xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 5ea0b5e6f37e8d9d2e418d5b0976248bd3e12eb3)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i;
40 
41   PetscFunctionBegin;
42   if (!a->diag) {
43     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   }
45   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60   if (!blockcompressed) {
61     /* malloc & create the natural set of indices */
62     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
63     for (i=0; i<n+1; i++) {
64       for (j=0; j<bs; j++) {
65         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
66       }
67     }
68     for (i=0; i<nz; i++) {
69       for (j=0; j<bs; j++) {
70         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
71       }
72     }
73   } else { /* blockcompressed */
74     if (oshift == 1) {
75       /* temporarily add 1 to i and j indices */
76       for (i=0; i<nz; i++) a->j[i]++;
77       for (i=0; i<n+1; i++) a->i[i]++;
78     }
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
88 {
89   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
90   PetscInt     i,n = a->mbs,nz = a->i[n];
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95 
96   if (!blockcompressed) {
97     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) { /* blockcompressed */
99     for (i=0; i<nz; i++) a->j[i]--;
100     for (i=0; i<n+1; i++) a->i[i]--;
101   }
102 
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111   PetscErrorCode ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
116 #endif
117   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
118   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
119   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
120   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
121   ierr = PetscFree(a->diag);CHKERRQ(ierr);
122   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
123   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
124   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
125   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
126   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
127   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
128   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
129 
130   ierr = PetscFree(a->inew);CHKERRQ(ierr);
131   ierr = PetscFree(a);CHKERRQ(ierr);
132 
133   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
134   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
135   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
146 {
147   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   switch (op) {
152   case MAT_ROW_ORIENTED:
153     a->roworiented = PETSC_TRUE;
154     break;
155   case MAT_COLUMN_ORIENTED:
156     a->roworiented = PETSC_FALSE;
157     break;
158   case MAT_COLUMNS_SORTED:
159     a->sorted = PETSC_TRUE;
160     break;
161   case MAT_COLUMNS_UNSORTED:
162     a->sorted = PETSC_FALSE;
163     break;
164   case MAT_KEEP_ZEROED_ROWS:
165     a->keepzeroedrows = PETSC_TRUE;
166     break;
167   case MAT_NO_NEW_NONZERO_LOCATIONS:
168     a->nonew = 1;
169     break;
170   case MAT_NEW_NONZERO_LOCATION_ERR:
171     a->nonew = -1;
172     break;
173   case MAT_NEW_NONZERO_ALLOCATION_ERR:
174     a->nonew = -2;
175     break;
176   case MAT_YES_NEW_NONZERO_LOCATIONS:
177     a->nonew = 0;
178     break;
179   case MAT_ROWS_SORTED:
180   case MAT_ROWS_UNSORTED:
181   case MAT_YES_NEW_DIAGONALS:
182   case MAT_IGNORE_OFF_PROC_ENTRIES:
183   case MAT_USE_HASH_TABLE:
184     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
185     break;
186   case MAT_NO_NEW_DIAGONALS:
187     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
188   case MAT_NOT_SYMMETRIC:
189   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
190   case MAT_HERMITIAN:
191     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
192   case MAT_SYMMETRIC:
193   case MAT_STRUCTURALLY_SYMMETRIC:
194   case MAT_NOT_HERMITIAN:
195   case MAT_SYMMETRY_ETERNAL:
196   case MAT_NOT_SYMMETRY_ETERNAL:
197     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
198     break;
199   case MAT_IGNORE_LOWER_TRIANGULAR:
200     a->ignore_ltriangular = PETSC_TRUE;
201     break;
202   case MAT_ERROR_LOWER_TRIANGULAR:
203     a->ignore_ltriangular = PETSC_FALSE;
204     break;
205   case MAT_GETROW_UPPERTRIANGULAR:
206     a->getrow_utriangular = PETSC_TRUE;
207     break;
208   default:
209     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
217 {
218   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
219   PetscErrorCode ierr;
220   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
221   MatScalar      *aa,*aa_i;
222   PetscScalar    *v_i;
223 
224   PetscFunctionBegin;
225   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()");
226   /* Get the upper triangular part of the row */
227   bs  = A->rmap.bs;
228   ai  = a->i;
229   aj  = a->j;
230   aa  = a->a;
231   bs2 = a->bs2;
232 
233   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
234 
235   bn  = row/bs;   /* Block number */
236   bp  = row % bs; /* Block position */
237   M   = ai[bn+1] - ai[bn];
238   *ncols = bs*M;
239 
240   if (v) {
241     *v = 0;
242     if (*ncols) {
243       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
244       for (i=0; i<M; i++) { /* for each block in the block row */
245         v_i  = *v + i*bs;
246         aa_i = aa + bs2*(ai[bn] + i);
247         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
248       }
249     }
250   }
251 
252   if (cols) {
253     *cols = 0;
254     if (*ncols) {
255       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
256       for (i=0; i<M; i++) { /* for each block in the block row */
257         cols_i = *cols + i*bs;
258         itmp  = bs*aj[ai[bn] + i];
259         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
260       }
261     }
262   }
263 
264   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
265   /* this segment is currently removed, so only entries in the upper triangle are obtained */
266 #ifdef column_search
267   v_i    = *v    + M*bs;
268   cols_i = *cols + M*bs;
269   for (i=0; i<bn; i++){ /* for each block row */
270     M = ai[i+1] - ai[i];
271     for (j=0; j<M; j++){
272       itmp = aj[ai[i] + j];    /* block column value */
273       if (itmp == bn){
274         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
275         for (k=0; k<bs; k++) {
276           *cols_i++ = i*bs+k;
277           *v_i++    = aa_i[k];
278         }
279         *ncols += bs;
280         break;
281       }
282     }
283   }
284 #endif
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
291 {
292   PetscErrorCode ierr;
293 
294   PetscFunctionBegin;
295   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
296   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
303 {
304   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
305 
306   PetscFunctionBegin;
307   a->getrow_utriangular = PETSC_TRUE;
308   PetscFunctionReturn(0);
309 }
310 #undef __FUNCT__
311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
313 {
314   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
315 
316   PetscFunctionBegin;
317   a->getrow_utriangular = PETSC_FALSE;
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
324 {
325   PetscErrorCode ierr;
326   PetscFunctionBegin;
327   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
334 {
335   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
336   PetscErrorCode    ierr;
337   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
338   const char        *name;
339   PetscViewerFormat format;
340 
341   PetscFunctionBegin;
342   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
343   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
344   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
345     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
346   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
347     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
348   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
349     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
350     for (i=0; i<a->mbs; i++) {
351       for (j=0; j<bs; j++) {
352         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
353         for (k=a->i[i]; k<a->i[i+1]; k++) {
354           for (l=0; l<bs; l++) {
355 #if defined(PETSC_USE_COMPLEX)
356             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
357               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
358                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
359             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
360               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
361                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
362             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
363               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
364             }
365 #else
366             if (a->a[bs2*k + l*bs + j] != 0.0) {
367               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
368             }
369 #endif
370           }
371         }
372         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
373       }
374     }
375     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
376   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
377      PetscFunctionReturn(0);
378   } else {
379     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
380     for (i=0; i<a->mbs; i++) {
381       for (j=0; j<bs; j++) {
382         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
383         for (k=a->i[i]; k<a->i[i+1]; k++) {
384           for (l=0; l<bs; l++) {
385 #if defined(PETSC_USE_COMPLEX)
386             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
387               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
388                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
390               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
391                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
392             } else {
393               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
394             }
395 #else
396             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
397 #endif
398           }
399         }
400         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
401       }
402     }
403     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
404   }
405   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
411 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
412 {
413   Mat            A = (Mat) Aa;
414   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
415   PetscErrorCode ierr;
416   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
417   PetscMPIInt    rank;
418   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
419   MatScalar      *aa;
420   MPI_Comm       comm;
421   PetscViewer    viewer;
422 
423   PetscFunctionBegin;
424   /*
425     This is nasty. If this is called from an originally parallel matrix
426     then all processes call this,but only the first has the matrix so the
427     rest should return immediately.
428   */
429   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
430   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
431   if (rank) PetscFunctionReturn(0);
432 
433   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
434 
435   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
436   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
437 
438   /* loop over matrix elements drawing boxes */
439   color = PETSC_DRAW_BLUE;
440   for (i=0,row=0; i<mbs; i++,row+=bs) {
441     for (j=a->i[i]; j<a->i[i+1]; j++) {
442       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
443       x_l = a->j[j]*bs; x_r = x_l + 1.0;
444       aa = a->a + j*bs2;
445       for (k=0; k<bs; k++) {
446         for (l=0; l<bs; l++) {
447           if (PetscRealPart(*aa++) >=  0.) continue;
448           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453   color = PETSC_DRAW_CYAN;
454   for (i=0,row=0; i<mbs; i++,row+=bs) {
455     for (j=a->i[i]; j<a->i[i+1]; j++) {
456       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
457       x_l = a->j[j]*bs; x_r = x_l + 1.0;
458       aa = a->a + j*bs2;
459       for (k=0; k<bs; k++) {
460         for (l=0; l<bs; l++) {
461           if (PetscRealPart(*aa++) != 0.) continue;
462           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
463         }
464       }
465     }
466   }
467 
468   color = PETSC_DRAW_RED;
469   for (i=0,row=0; i<mbs; i++,row+=bs) {
470     for (j=a->i[i]; j<a->i[i+1]; j++) {
471       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
472       x_l = a->j[j]*bs; x_r = x_l + 1.0;
473       aa = a->a + j*bs2;
474       for (k=0; k<bs; k++) {
475         for (l=0; l<bs; l++) {
476           if (PetscRealPart(*aa++) <= 0.) continue;
477           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
478         }
479       }
480     }
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
487 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
488 {
489   PetscErrorCode ierr;
490   PetscReal      xl,yl,xr,yr,w,h;
491   PetscDraw      draw;
492   PetscTruth     isnull;
493 
494   PetscFunctionBegin;
495 
496   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
497   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
498 
499   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
500   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
501   xr += w;    yr += h;  xl = -w;     yl = -h;
502   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
503   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
504   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatView_SeqSBAIJ"
510 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
511 {
512   PetscErrorCode ierr;
513   PetscTruth     iascii,isdraw;
514 
515   PetscFunctionBegin;
516   if (A->factor){
517     ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
518   }
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
520   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
521   if (iascii){
522     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
523   } else if (isdraw) {
524     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
525   } else {
526     Mat B;
527     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
528     ierr = MatView(B,viewer);CHKERRQ(ierr);
529     ierr = MatDestroy(B);CHKERRQ(ierr);
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
537 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
538 {
539   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
540   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
541   PetscInt     *ai = a->i,*ailen = a->ilen;
542   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
543   MatScalar    *ap,*aa = a->a,zero = 0.0;
544 
545   PetscFunctionBegin;
546   for (k=0; k<m; k++) { /* loop over rows */
547     row  = im[k]; brow = row/bs;
548     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
549     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
550     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
551     nrow = ailen[brow];
552     for (l=0; l<n; l++) { /* loop over columns */
553       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
554       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
555       col  = in[l] ;
556       bcol = col/bs;
557       cidx = col%bs;
558       ridx = row%bs;
559       high = nrow;
560       low  = 0; /* assume unsorted */
561       while (high-low > 5) {
562         t = (low+high)/2;
563         if (rp[t] > bcol) high = t;
564         else             low  = t;
565       }
566       for (i=low; i<high; i++) {
567         if (rp[i] > bcol) break;
568         if (rp[i] == bcol) {
569           *v++ = ap[bs2*i+bs*cidx+ridx];
570           goto finished;
571         }
572       }
573       *v++ = zero;
574        finished:;
575     }
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
583 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
584 {
585   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
586   PetscErrorCode  ierr;
587   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
588   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
589   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
590   PetscTruth      roworiented=a->roworiented;
591   const MatScalar *value = v;
592   MatScalar       *ap,*aa = a->a,*bap;
593 
594   PetscFunctionBegin;
595   if (roworiented) {
596     stepval = (n-1)*bs;
597   } else {
598     stepval = (m-1)*bs;
599   }
600   for (k=0; k<m; k++) { /* loop over added rows */
601     row  = im[k];
602     if (row < 0) continue;
603 #if defined(PETSC_USE_DEBUG)
604     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
605 #endif
606     rp   = aj + ai[row];
607     ap   = aa + bs2*ai[row];
608     rmax = imax[row];
609     nrow = ailen[row];
610     low  = 0;
611     high = nrow;
612     for (l=0; l<n; l++) { /* loop over added columns */
613       if (in[l] < 0) continue;
614       col = in[l];
615 #if defined(PETSC_USE_DEBUG)
616       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
617 #endif
618       if (col < row) continue; /* ignore lower triangular block */
619       if (roworiented) {
620         value = v + k*(stepval+bs)*bs + l*bs;
621       } else {
622         value = v + l*(stepval+bs)*bs + k*bs;
623       }
624       if (col <= lastcol) low = 0; else high = nrow;
625       lastcol = col;
626       while (high-low > 7) {
627         t = (low+high)/2;
628         if (rp[t] > col) high = t;
629         else             low  = t;
630       }
631       for (i=low; i<high; i++) {
632         if (rp[i] > col) break;
633         if (rp[i] == col) {
634           bap  = ap +  bs2*i;
635           if (roworiented) {
636             if (is == ADD_VALUES) {
637               for (ii=0; ii<bs; ii++,value+=stepval) {
638                 for (jj=ii; jj<bs2; jj+=bs) {
639                   bap[jj] += *value++;
640                 }
641               }
642             } else {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] = *value++;
646                 }
647                }
648             }
649           } else {
650             if (is == ADD_VALUES) {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=0; jj<bs; jj++) {
653                   *bap++ += *value++;
654                 }
655               }
656             } else {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++  = *value++;
660                 }
661               }
662             }
663           }
664           goto noinsert2;
665         }
666       }
667       if (nonew == 1) goto noinsert2;
668       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
669       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
670       N = nrow++ - 1; high++;
671       /* shift up all the later entries in this row */
672       for (ii=N; ii>=i; ii--) {
673         rp[ii+1] = rp[ii];
674         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
675       }
676       if (N >= i) {
677         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
678       }
679       rp[i] = col;
680       bap   = ap +  bs2*i;
681       if (roworiented) {
682         for (ii=0; ii<bs; ii++,value+=stepval) {
683           for (jj=ii; jj<bs2; jj+=bs) {
684             bap[jj] = *value++;
685           }
686         }
687       } else {
688         for (ii=0; ii<bs; ii++,value+=stepval) {
689           for (jj=0; jj<bs; jj++) {
690             *bap++  = *value++;
691           }
692         }
693        }
694     noinsert2:;
695       low = i;
696     }
697     ailen[row] = nrow;
698   }
699    PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNCT__
703 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
704 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
705 {
706   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
707   PetscErrorCode ierr;
708   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
709   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
710   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
711   MatScalar      *aa = a->a,*ap;
712 
713   PetscFunctionBegin;
714   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
715 
716   if (m) rmax = ailen[0];
717   for (i=1; i<mbs; i++) {
718     /* move each row back by the amount of empty slots (fshift) before it*/
719     fshift += imax[i-1] - ailen[i-1];
720      rmax   = PetscMax(rmax,ailen[i]);
721      if (fshift) {
722        ip = aj + ai[i]; ap = aa + bs2*ai[i];
723        N = ailen[i];
724        for (j=0; j<N; j++) {
725          ip[j-fshift] = ip[j];
726          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
727        }
728      }
729      ai[i] = ai[i-1] + ailen[i-1];
730   }
731   if (mbs) {
732     fshift += imax[mbs-1] - ailen[mbs-1];
733      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
734   }
735   /* reset ilen and imax for each row */
736   for (i=0; i<mbs; i++) {
737     ailen[i] = imax[i] = ai[i+1] - ai[i];
738   }
739   a->nz = ai[mbs];
740 
741   /* diagonals may have moved, reset it */
742   if (a->diag) {
743     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
744   }
745   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
746   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
747   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
748   a->reallocs          = 0;
749   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
750   PetscFunctionReturn(0);
751 }
752 
753 /*
754    This function returns an array of flags which indicate the locations of contiguous
755    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
756    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
757    Assume: sizes should be long enough to hold all the values.
758 */
759 #undef __FUNCT__
760 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
761 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
762 {
763   PetscInt   i,j,k,row;
764   PetscTruth flg;
765 
766   PetscFunctionBegin;
767    for (i=0,j=0; i<n; j++) {
768      row = idx[i];
769      if (row%bs!=0) { /* Not the begining of a block */
770        sizes[j] = 1;
771        i++;
772      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
773        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
774        i++;
775      } else { /* Begining of the block, so check if the complete block exists */
776        flg = PETSC_TRUE;
777        for (k=1; k<bs; k++) {
778          if (row+k != idx[i+k]) { /* break in the block */
779            flg = PETSC_FALSE;
780            break;
781          }
782        }
783        if (flg) { /* No break in the bs */
784          sizes[j] = bs;
785          i+= bs;
786        } else {
787          sizes[j] = 1;
788          i++;
789        }
790      }
791    }
792    *bs_max = j;
793    PetscFunctionReturn(0);
794 }
795 
796 
797 /* Only add/insert a(i,j) with i<=j (blocks).
798    Any a(i,j) with i>j input by user is ingored.
799 */
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
803 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
804 {
805   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
806   PetscErrorCode ierr;
807   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
808   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
809   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
810   PetscInt       ridx,cidx,bs2=a->bs2;
811   MatScalar      *ap,value,*aa=a->a,*bap;
812 
813   PetscFunctionBegin;
814   for (k=0; k<m; k++) { /* loop over added rows */
815     row  = im[k];       /* row number */
816     brow = row/bs;      /* block row number */
817     if (row < 0) continue;
818 #if defined(PETSC_USE_DEBUG)
819     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
820 #endif
821     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
822     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
823     rmax = imax[brow];  /* maximum space allocated for this row */
824     nrow = ailen[brow]; /* actual length of this row */
825     low  = 0;
826 
827     for (l=0; l<n; l++) { /* loop over added columns */
828       if (in[l] < 0) continue;
829 #if defined(PETSC_USE_DEBUG)
830       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
831 #endif
832       col = in[l];
833       bcol = col/bs;              /* block col number */
834 
835       if (brow > bcol) {
836         if (a->ignore_ltriangular){
837           continue; /* ignore lower triangular values */
838         } else {
839           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
840         }
841       }
842 
843       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
844       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
845         /* element value a(k,l) */
846         if (roworiented) {
847           value = v[l + k*n];
848         } else {
849           value = v[k + l*m];
850         }
851 
852         /* move pointer bap to a(k,l) quickly and add/insert value */
853         if (col <= lastcol) low = 0; high = nrow;
854         lastcol = col;
855         while (high-low > 7) {
856           t = (low+high)/2;
857           if (rp[t] > bcol) high = t;
858           else              low  = t;
859         }
860         for (i=low; i<high; i++) {
861           if (rp[i] > bcol) break;
862           if (rp[i] == bcol) {
863             bap  = ap +  bs2*i + bs*cidx + ridx;
864             if (is == ADD_VALUES) *bap += value;
865             else                  *bap  = value;
866             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
867             if (brow == bcol && ridx < cidx){
868               bap  = ap +  bs2*i + bs*ridx + cidx;
869               if (is == ADD_VALUES) *bap += value;
870               else                  *bap  = value;
871             }
872             goto noinsert1;
873           }
874         }
875 
876         if (nonew == 1) goto noinsert1;
877         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
878         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
879 
880         N = nrow++ - 1; high++;
881         /* shift up all the later entries in this row */
882         for (ii=N; ii>=i; ii--) {
883           rp[ii+1] = rp[ii];
884           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
885         }
886         if (N>=i) {
887           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
888         }
889         rp[i]                      = bcol;
890         ap[bs2*i + bs*cidx + ridx] = value;
891       noinsert1:;
892         low = i;
893       }
894     }   /* end of loop over added columns */
895     ailen[brow] = nrow;
896   }   /* end of loop over added rows */
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
902 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
903 {
904   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
905   Mat            outA;
906   PetscErrorCode ierr;
907   PetscTruth     row_identity;
908 
909   PetscFunctionBegin;
910   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
911   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
912   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
913   if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
914 
915   outA        = inA;
916   inA->factor = FACTOR_CHOLESKY;
917 
918   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
919   /*
920     Blocksize < 8 have a special faster factorization/solver
921     for ICC(0) factorization with natural ordering
922   */
923   switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */
924   case 1:
925     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
926     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
927     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
928     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
929     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
930     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
931     ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
932     break;
933   case 2:
934     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
935     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
936     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
937     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
938     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
939     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
940     break;
941   case 3:
942      inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
943      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
944      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
945      inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
946      inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
947      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
948      break;
949   case 4:
950     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
951     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
952     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
953     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
954     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
955     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
956     break;
957   case 5:
958     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
959     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
960     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
961     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
962     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
963     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
964     break;
965   case 6:
966     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
967     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
968     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
969     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
970     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
971     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
972     break;
973   case 7:
974     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
975     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
976     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
977     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
978     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
979     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
980     break;
981   default:
982     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
983     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_N_NaturalOrdering;
984     inA->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
985     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
986     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
987     break;
988   }
989 
990   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
991   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
992   a->row = row;
993   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
994   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
995   a->col = row;
996 
997   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
998   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
999   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1000 
1001   if (!a->solve_work) {
1002     ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1003     ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1004   }
1005 
1006   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 EXTERN_C_BEGIN
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1013 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1014 {
1015   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1016   PetscInt     i,nz,n;
1017 
1018   PetscFunctionBegin;
1019   nz = baij->maxnz;
1020   n  = mat->cmap.n;
1021   for (i=0; i<nz; i++) {
1022     baij->j[i] = indices[i];
1023   }
1024    baij->nz = nz;
1025    for (i=0; i<n; i++) {
1026      baij->ilen[i] = baij->imax[i];
1027    }
1028    PetscFunctionReturn(0);
1029 }
1030 EXTERN_C_END
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1034 /*@
1035   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1036   in the matrix.
1037 
1038   Input Parameters:
1039   +  mat     - the SeqSBAIJ matrix
1040   -  indices - the column indices
1041 
1042   Level: advanced
1043 
1044   Notes:
1045   This can be called if you have precomputed the nonzero structure of the
1046   matrix and want to provide it to the matrix object to improve the performance
1047   of the MatSetValues() operation.
1048 
1049   You MUST have set the correct numbers of nonzeros per row in the call to
1050   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1051 
1052   MUST be called before any calls to MatSetValues()
1053 
1054   .seealso: MatCreateSeqSBAIJ
1055 @*/
1056 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1057 {
1058   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1059 
1060   PetscFunctionBegin;
1061   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1062   PetscValidPointer(indices,2);
1063   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1064   if (f) {
1065     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1066   } else {
1067     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1074 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1075 {
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   /* If the two matrices have the same copy implementation, use fast copy. */
1080   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1081     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1082     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1083 
1084     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1085       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1086     }
1087     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1088   } else {
1089     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1090     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1091     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1098 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1099 {
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1109 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1110 {
1111   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1112   PetscFunctionBegin;
1113   *array = a->a;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1119 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1120 {
1121   PetscFunctionBegin;
1122   PetscFunctionReturn(0);
1123  }
1124 
1125 #include "petscblaslapack.h"
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1128 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1129 {
1130   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1131   PetscErrorCode ierr;
1132   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1133   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1134 
1135   PetscFunctionBegin;
1136   if (str == SAME_NONZERO_PATTERN) {
1137     PetscScalar alpha = a;
1138     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1139   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1140     if (y->xtoy && y->XtoY != X) {
1141       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1142       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1143     }
1144     if (!y->xtoy) { /* get xtoy */
1145       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1146       y->XtoY = X;
1147     }
1148     bs2 = bs*bs;
1149     for (i=0; i<x->nz; i++) {
1150       j = 0;
1151       while (j < bs2){
1152         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1153         j++;
1154       }
1155     }
1156     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1157   } else {
1158     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1159     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1160     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1161   }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1167 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1168 {
1169   PetscFunctionBegin;
1170   *flg = PETSC_TRUE;
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1176 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1177 {
1178    PetscFunctionBegin;
1179    *flg = PETSC_TRUE;
1180    PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1185 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1186  {
1187    PetscFunctionBegin;
1188    *flg = PETSC_FALSE;
1189    PetscFunctionReturn(0);
1190  }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1194 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1195 {
1196   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1197   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1198   PetscScalar    *aa = a->a;
1199 
1200   PetscFunctionBegin;
1201   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1207 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1208 {
1209   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1210   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1211   PetscScalar    *aa = a->a;
1212 
1213   PetscFunctionBegin;
1214   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /* -------------------------------------------------------------------*/
1219 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1220        MatGetRow_SeqSBAIJ,
1221        MatRestoreRow_SeqSBAIJ,
1222        MatMult_SeqSBAIJ_N,
1223 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1224        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1225        MatMultAdd_SeqSBAIJ_N,
1226        MatSolve_SeqSBAIJ_N,
1227        0,
1228        0,
1229 /*10*/ 0,
1230        0,
1231        MatCholeskyFactor_SeqSBAIJ,
1232        MatRelax_SeqSBAIJ,
1233        MatTranspose_SeqSBAIJ,
1234 /*15*/ MatGetInfo_SeqSBAIJ,
1235        MatEqual_SeqSBAIJ,
1236        MatGetDiagonal_SeqSBAIJ,
1237        MatDiagonalScale_SeqSBAIJ,
1238        MatNorm_SeqSBAIJ,
1239 /*20*/ 0,
1240        MatAssemblyEnd_SeqSBAIJ,
1241        0,
1242        MatSetOption_SeqSBAIJ,
1243        MatZeroEntries_SeqSBAIJ,
1244 /*25*/ 0,
1245        0,
1246        0,
1247        MatCholeskyFactorSymbolic_SeqSBAIJ,
1248        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1249 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1250        0,
1251        MatICCFactorSymbolic_SeqSBAIJ,
1252        MatGetArray_SeqSBAIJ,
1253        MatRestoreArray_SeqSBAIJ,
1254 /*35*/ MatDuplicate_SeqSBAIJ,
1255        MatForwardSolve_SeqSBAIJ_N,
1256        MatBackwardSolve_SeqSBAIJ_N,
1257        0,
1258        MatICCFactor_SeqSBAIJ,
1259 /*40*/ MatAXPY_SeqSBAIJ,
1260        MatGetSubMatrices_SeqSBAIJ,
1261        MatIncreaseOverlap_SeqSBAIJ,
1262        MatGetValues_SeqSBAIJ,
1263        MatCopy_SeqSBAIJ,
1264 /*45*/ 0,
1265        MatScale_SeqSBAIJ,
1266        0,
1267        0,
1268        0,
1269 /*50*/ 0,
1270        MatGetRowIJ_SeqSBAIJ,
1271        MatRestoreRowIJ_SeqSBAIJ,
1272        0,
1273        0,
1274 /*55*/ 0,
1275        0,
1276        0,
1277        0,
1278        MatSetValuesBlocked_SeqSBAIJ,
1279 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1280        0,
1281        0,
1282        0,
1283        0,
1284 /*65*/ 0,
1285        0,
1286        0,
1287        0,
1288        0,
1289 /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1290        0,
1291        0,
1292        0,
1293        0,
1294 /*75*/ 0,
1295        0,
1296        0,
1297        0,
1298        0,
1299 /*80*/ 0,
1300        0,
1301        0,
1302 #if !defined(PETSC_USE_COMPLEX)
1303        MatGetInertia_SeqSBAIJ,
1304 #else
1305        0,
1306 #endif
1307        MatLoad_SeqSBAIJ,
1308 /*85*/ MatIsSymmetric_SeqSBAIJ,
1309        MatIsHermitian_SeqSBAIJ,
1310        MatIsStructurallySymmetric_SeqSBAIJ,
1311        0,
1312        0,
1313 /*90*/ 0,
1314        0,
1315        0,
1316        0,
1317        0,
1318 /*95*/ 0,
1319        0,
1320        0,
1321        0,
1322        0,
1323 /*100*/0,
1324        0,
1325        0,
1326        0,
1327        0,
1328 /*105*/0,
1329        MatRealPart_SeqSBAIJ,
1330        MatImaginaryPart_SeqSBAIJ,
1331        MatGetRowUpperTriangular_SeqSBAIJ,
1332        MatRestoreRowUpperTriangular_SeqSBAIJ
1333 };
1334 
1335 EXTERN_C_BEGIN
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1338 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1339 {
1340   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1341   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   if (aij->nonew != 1) {
1346     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1347   }
1348 
1349   /* allocate space for values if not already there */
1350   if (!aij->saved_values) {
1351     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1352   }
1353 
1354   /* copy values over */
1355   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 EXTERN_C_END
1359 
1360 EXTERN_C_BEGIN
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1363 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1364 {
1365   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1366   PetscErrorCode ierr;
1367   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1368 
1369   PetscFunctionBegin;
1370   if (aij->nonew != 1) {
1371     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1372   }
1373   if (!aij->saved_values) {
1374     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1375   }
1376 
1377   /* copy values over */
1378   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 EXTERN_C_END
1382 
1383 EXTERN_C_BEGIN
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1386 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1387 {
1388   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1389   PetscErrorCode ierr;
1390   PetscInt       i,mbs,bs2;
1391   PetscTruth     skipallocation = PETSC_FALSE,flg;
1392 
1393   PetscFunctionBegin;
1394   B->preallocated = PETSC_TRUE;
1395   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1396   B->rmap.bs = B->cmap.bs = bs;
1397   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1398   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1399 
1400   mbs  = B->rmap.N/bs;
1401   bs2  = bs*bs;
1402 
1403   if (mbs*bs != B->rmap.N) {
1404     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1405   }
1406 
1407   if (nz == MAT_SKIP_ALLOCATION) {
1408     skipallocation = PETSC_TRUE;
1409     nz             = 0;
1410   }
1411 
1412   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1413   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1414   if (nnz) {
1415     for (i=0; i<mbs; i++) {
1416       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1417       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1418     }
1419   }
1420 
1421   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1422   if (!flg) {
1423     switch (bs) {
1424     case 1:
1425       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1426       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1427       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1428       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1429       B->ops->mult             = MatMult_SeqSBAIJ_1;
1430       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1431       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1432       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1433       break;
1434     case 2:
1435       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1436       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1437       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1438       B->ops->mult             = MatMult_SeqSBAIJ_2;
1439       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1440       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1441       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1442       break;
1443     case 3:
1444       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1445       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1446       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1447       B->ops->mult             = MatMult_SeqSBAIJ_3;
1448       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1449       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1450       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1451       break;
1452     case 4:
1453       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1454       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1455       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1456       B->ops->mult             = MatMult_SeqSBAIJ_4;
1457       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1458       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1459       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1460       break;
1461     case 5:
1462       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1463       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1464       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1465       B->ops->mult             = MatMult_SeqSBAIJ_5;
1466       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1467       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1468       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1469       break;
1470     case 6:
1471       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1472       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1473       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1474       B->ops->mult             = MatMult_SeqSBAIJ_6;
1475       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1476       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1477       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1478       break;
1479     case 7:
1480       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1481       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1482       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1483       B->ops->mult             = MatMult_SeqSBAIJ_7;
1484       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1485       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1486       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1487       break;
1488     }
1489   }
1490 
1491   b->mbs = mbs;
1492   b->nbs = mbs;
1493   if (!skipallocation) {
1494     /* b->ilen will count nonzeros in each block row so far. */
1495     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1496     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1497     if (!nnz) {
1498       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1499       else if (nz <= 0)        nz = 1;
1500       for (i=0; i<mbs; i++) {
1501         b->imax[i] = nz;
1502       }
1503       nz = nz*mbs; /* total nz */
1504     } else {
1505       nz = 0;
1506       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1507     }
1508     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1509 
1510     /* allocate the matrix space */
1511     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1512     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1513     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1514     b->singlemalloc = PETSC_TRUE;
1515 
1516     /* pointer to beginning of each row */
1517     b->i[0] = 0;
1518     for (i=1; i<mbs+1; i++) {
1519       b->i[i] = b->i[i-1] + b->imax[i-1];
1520     }
1521     b->free_a     = PETSC_TRUE;
1522     b->free_ij    = PETSC_TRUE;
1523   } else {
1524     b->free_a     = PETSC_FALSE;
1525     b->free_ij    = PETSC_FALSE;
1526   }
1527 
1528   B->rmap.bs               = bs;
1529   b->bs2              = bs2;
1530   b->nz             = 0;
1531   b->maxnz          = nz*bs2;
1532 
1533   b->inew             = 0;
1534   b->jnew             = 0;
1535   b->anew             = 0;
1536   b->a2anew           = 0;
1537   b->permute          = PETSC_FALSE;
1538   PetscFunctionReturn(0);
1539 }
1540 EXTERN_C_END
1541 
1542 EXTERN_C_BEGIN
1543 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1544 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1545 EXTERN_C_END
1546 
1547 /*MC
1548   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1549   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1550 
1551   Options Database Keys:
1552   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1553 
1554   Level: beginner
1555 
1556   .seealso: MatCreateSeqSBAIJ
1557 M*/
1558 
1559 EXTERN_C_BEGIN
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1562 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1563 {
1564   Mat_SeqSBAIJ   *b;
1565   PetscErrorCode ierr;
1566   PetscMPIInt    size;
1567   PetscTruth     flg;
1568 
1569   PetscFunctionBegin;
1570   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1571   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1572 
1573   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1574   B->data = (void*)b;
1575   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1576   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1577   B->ops->view        = MatView_SeqSBAIJ;
1578   B->factor           = 0;
1579   B->mapping          = 0;
1580   b->row              = 0;
1581   b->icol             = 0;
1582   b->reallocs         = 0;
1583   b->saved_values     = 0;
1584 
1585 
1586   b->sorted           = PETSC_FALSE;
1587   b->roworiented      = PETSC_TRUE;
1588   b->nonew            = 0;
1589   b->diag             = 0;
1590   b->solve_work       = 0;
1591   b->mult_work        = 0;
1592   B->spptr            = 0;
1593   b->keepzeroedrows   = PETSC_FALSE;
1594   b->xtoy             = 0;
1595   b->XtoY             = 0;
1596 
1597   b->inew             = 0;
1598   b->jnew             = 0;
1599   b->anew             = 0;
1600   b->a2anew           = 0;
1601   b->permute          = PETSC_FALSE;
1602 
1603   b->ignore_ltriangular = PETSC_FALSE;
1604   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1605   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1606 
1607   b->getrow_utriangular = PETSC_FALSE;
1608   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1609   if (flg) b->getrow_utriangular = PETSC_TRUE;
1610 
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1612                                      "MatStoreValues_SeqSBAIJ",
1613                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1615                                      "MatRetrieveValues_SeqSBAIJ",
1616                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1618                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1619                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1621                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1622                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1624                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1625                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1627                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1628                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1629 
1630   B->symmetric                  = PETSC_TRUE;
1631   B->structurally_symmetric     = PETSC_TRUE;
1632   B->symmetric_set              = PETSC_TRUE;
1633   B->structurally_symmetric_set = PETSC_TRUE;
1634   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 EXTERN_C_END
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1641 /*@C
1642    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1643    compressed row) format.  For good matrix assembly performance the
1644    user should preallocate the matrix storage by setting the parameter nz
1645    (or the array nnz).  By setting these parameters accurately, performance
1646    during matrix assembly can be increased by more than a factor of 50.
1647 
1648    Collective on Mat
1649 
1650    Input Parameters:
1651 +  A - the symmetric matrix
1652 .  bs - size of block
1653 .  nz - number of block nonzeros per block row (same for all rows)
1654 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1655          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1656 
1657    Options Database Keys:
1658 .   -mat_no_unroll - uses code that does not unroll the loops in the
1659                      block calculations (much slower)
1660 .    -mat_block_size - size of the blocks to use
1661 
1662    Level: intermediate
1663 
1664    Notes:
1665    Specify the preallocated storage with either nz or nnz (not both).
1666    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1667    allocation.  For additional details, see the users manual chapter on
1668    matrices.
1669 
1670    If the nnz parameter is given then the nz parameter is ignored
1671 
1672 
1673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1674 @*/
1675 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1676 {
1677   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1678 
1679   PetscFunctionBegin;
1680   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1681   if (f) {
1682     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatCreateSeqSBAIJ"
1689 /*@C
1690    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1691    compressed row) format.  For good matrix assembly performance the
1692    user should preallocate the matrix storage by setting the parameter nz
1693    (or the array nnz).  By setting these parameters accurately, performance
1694    during matrix assembly can be increased by more than a factor of 50.
1695 
1696    Collective on MPI_Comm
1697 
1698    Input Parameters:
1699 +  comm - MPI communicator, set to PETSC_COMM_SELF
1700 .  bs - size of block
1701 .  m - number of rows, or number of columns
1702 .  nz - number of block nonzeros per block row (same for all rows)
1703 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1704          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1705 
1706    Output Parameter:
1707 .  A - the symmetric matrix
1708 
1709    Options Database Keys:
1710 .   -mat_no_unroll - uses code that does not unroll the loops in the
1711                      block calculations (much slower)
1712 .    -mat_block_size - size of the blocks to use
1713 
1714    Level: intermediate
1715 
1716    Notes:
1717 
1718    Specify the preallocated storage with either nz or nnz (not both).
1719    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1720    allocation.  For additional details, see the users manual chapter on
1721    matrices.
1722 
1723    If the nnz parameter is given then the nz parameter is ignored
1724 
1725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1726 @*/
1727 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1728 {
1729   PetscErrorCode ierr;
1730 
1731   PetscFunctionBegin;
1732   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1733   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1734   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1735   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1741 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1742 {
1743   Mat            C;
1744   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1745   PetscErrorCode ierr;
1746   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1747 
1748   PetscFunctionBegin;
1749   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1750 
1751   *B = 0;
1752   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1753   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1754   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1755   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1756   c    = (Mat_SeqSBAIJ*)C->data;
1757 
1758   C->preallocated   = PETSC_TRUE;
1759   C->factor         = A->factor;
1760   c->row            = 0;
1761   c->icol           = 0;
1762   c->saved_values   = 0;
1763   c->keepzeroedrows = a->keepzeroedrows;
1764   C->assembled      = PETSC_TRUE;
1765 
1766   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1767   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1768   c->bs2  = a->bs2;
1769   c->mbs  = a->mbs;
1770   c->nbs  = a->nbs;
1771 
1772   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1773   for (i=0; i<mbs; i++) {
1774     c->imax[i] = a->imax[i];
1775     c->ilen[i] = a->ilen[i];
1776   }
1777 
1778   /* allocate the matrix space */
1779   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1780   c->singlemalloc = PETSC_TRUE;
1781   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1782   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1783   if (mbs > 0) {
1784     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1785     if (cpvalues == MAT_COPY_VALUES) {
1786       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1787     } else {
1788       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1789     }
1790   }
1791 
1792   c->sorted      = a->sorted;
1793   c->roworiented = a->roworiented;
1794   c->nonew       = a->nonew;
1795 
1796   if (a->diag) {
1797     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1798     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1799     for (i=0; i<mbs; i++) {
1800       c->diag[i] = a->diag[i];
1801     }
1802   } else c->diag  = 0;
1803   c->nz           = a->nz;
1804   c->maxnz        = a->maxnz;
1805   c->solve_work   = 0;
1806   c->mult_work    = 0;
1807   c->free_a       = PETSC_TRUE;
1808   c->free_ij      = PETSC_TRUE;
1809   *B = C;
1810   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1816 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1817 {
1818   Mat_SeqSBAIJ   *a;
1819   Mat            B;
1820   PetscErrorCode ierr;
1821   int            fd;
1822   PetscMPIInt    size;
1823   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1824   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1825   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1826   PetscInt       *masked,nmask,tmp,bs2,ishift;
1827   PetscScalar    *aa;
1828   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1829 
1830   PetscFunctionBegin;
1831   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1832   bs2  = bs*bs;
1833 
1834   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1835   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1836   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1837   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1838   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1839   M = header[1]; N = header[2]; nz = header[3];
1840 
1841   if (header[3] < 0) {
1842     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1843   }
1844 
1845   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1846 
1847   /*
1848      This code adds extra rows to make sure the number of rows is
1849     divisible by the blocksize
1850   */
1851   mbs        = M/bs;
1852   extra_rows = bs - M + bs*(mbs);
1853   if (extra_rows == bs) extra_rows = 0;
1854   else                  mbs++;
1855   if (extra_rows) {
1856     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1857   }
1858 
1859   /* read in row lengths */
1860   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1861   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1862   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1863 
1864   /* read in column indices */
1865   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1866   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1867   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1868 
1869   /* loop over row lengths determining block row lengths */
1870   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1871   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1872   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1873   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1874   masked   = mask + mbs;
1875   rowcount = 0; nzcount = 0;
1876   for (i=0; i<mbs; i++) {
1877     nmask = 0;
1878     for (j=0; j<bs; j++) {
1879       kmax = rowlengths[rowcount];
1880       for (k=0; k<kmax; k++) {
1881         tmp = jj[nzcount++]/bs;   /* block col. index */
1882         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1883       }
1884       rowcount++;
1885     }
1886     s_browlengths[i] += nmask;
1887 
1888     /* zero out the mask elements we set */
1889     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1890   }
1891 
1892   /* create our matrix */
1893   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1894   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1895   ierr = MatSetType(B,type);CHKERRQ(ierr);
1896   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1897   a = (Mat_SeqSBAIJ*)B->data;
1898 
1899   /* set matrix "i" values */
1900   a->i[0] = 0;
1901   for (i=1; i<= mbs; i++) {
1902     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1903     a->ilen[i-1] = s_browlengths[i-1];
1904   }
1905   a->nz = a->i[mbs];
1906 
1907   /* read in nonzero values */
1908   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1909   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1910   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1911 
1912   /* set "a" and "j" values into matrix */
1913   nzcount = 0; jcount = 0;
1914   for (i=0; i<mbs; i++) {
1915     nzcountb = nzcount;
1916     nmask    = 0;
1917     for (j=0; j<bs; j++) {
1918       kmax = rowlengths[i*bs+j];
1919       for (k=0; k<kmax; k++) {
1920         tmp = jj[nzcount++]/bs; /* block col. index */
1921         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1922       }
1923     }
1924     /* sort the masked values */
1925     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1926 
1927     /* set "j" values into matrix */
1928     maskcount = 1;
1929     for (j=0; j<nmask; j++) {
1930       a->j[jcount++]  = masked[j];
1931       mask[masked[j]] = maskcount++;
1932     }
1933 
1934     /* set "a" values into matrix */
1935     ishift = bs2*a->i[i];
1936     for (j=0; j<bs; j++) {
1937       kmax = rowlengths[i*bs+j];
1938       for (k=0; k<kmax; k++) {
1939         tmp       = jj[nzcountb]/bs ; /* block col. index */
1940         if (tmp >= i){
1941           block     = mask[tmp] - 1;
1942           point     = jj[nzcountb] - bs*tmp;
1943           idx       = ishift + bs2*block + j + bs*point;
1944           a->a[idx] = aa[nzcountb];
1945         }
1946         nzcountb++;
1947       }
1948     }
1949     /* zero out the mask elements we set */
1950     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1951   }
1952   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1953 
1954   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1955   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1956   ierr = PetscFree(aa);CHKERRQ(ierr);
1957   ierr = PetscFree(jj);CHKERRQ(ierr);
1958   ierr = PetscFree(mask);CHKERRQ(ierr);
1959 
1960   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962   ierr = MatView_Private(B);CHKERRQ(ierr);
1963   *A = B;
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1969 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1970 {
1971   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1972   MatScalar      *aa=a->a,*v,*v1;
1973   PetscScalar    *x,*b,*t,sum,d;
1974   PetscErrorCode ierr;
1975   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
1976   PetscInt       nz,nz1,*vj,*vj1,i;
1977 
1978   PetscFunctionBegin;
1979   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
1980 
1981   its = its*lits;
1982   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1983 
1984   if (bs > 1)
1985     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1986 
1987   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1988   if (xx != bb) {
1989     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1990   } else {
1991     b = x;
1992   }
1993 
1994   if (!a->relax_work) {
1995     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
1996   }
1997   t = a->relax_work;
1998 
1999   if (flag & SOR_ZERO_INITIAL_GUESS) {
2000     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2001       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2002 
2003       for (i=0; i<m; i++){
2004         d  = *(aa + ai[i]);  /* diag[i] */
2005         v  = aa + ai[i] + 1;
2006         vj = aj + ai[i] + 1;
2007         nz = ai[i+1] - ai[i] - 1;
2008         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2009         x[i] = omega*t[i]/d;
2010         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2011       }
2012     }
2013 
2014     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2015       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2016         t = b;
2017       }
2018 
2019       for (i=m-1; i>=0; i--){
2020         d  = *(aa + ai[i]);
2021         v  = aa + ai[i] + 1;
2022         vj = aj + ai[i] + 1;
2023         nz = ai[i+1] - ai[i] - 1;
2024         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2025         sum = t[i];
2026         while (nz--) sum -= x[*vj++]*(*v++);
2027         x[i] =   (1-omega)*x[i] + omega*sum/d;
2028       }
2029       t = a->relax_work;
2030     }
2031     its--;
2032   }
2033 
2034   while (its--) {
2035     /*
2036        forward sweep:
2037        for i=0,...,m-1:
2038          sum[i] = (b[i] - U(i,:)x )/d[i];
2039          x[i]   = (1-omega)x[i] + omega*sum[i];
2040          b      = b - x[i]*U^T(i,:);
2041 
2042     */
2043     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2044       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2045 
2046       for (i=0; i<m; i++){
2047         d  = *(aa + ai[i]);  /* diag[i] */
2048         v  = aa + ai[i] + 1; v1=v;
2049         vj = aj + ai[i] + 1; vj1=vj;
2050         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2051         sum = t[i];
2052         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2053         while (nz1--) sum -= (*v1++)*x[*vj1++];
2054         x[i] = (1-omega)*x[i] + omega*sum/d;
2055         while (nz--) t[*vj++] -= x[i]*(*v++);
2056       }
2057     }
2058 
2059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2060       /*
2061        backward sweep:
2062        b = b - x[i]*U^T(i,:), i=0,...,n-2
2063        for i=m-1,...,0:
2064          sum[i] = (b[i] - U(i,:)x )/d[i];
2065          x[i]   = (1-omega)x[i] + omega*sum[i];
2066       */
2067       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2068       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2069 
2070       for (i=0; i<m-1; i++){  /* update rhs */
2071         v  = aa + ai[i] + 1;
2072         vj = aj + ai[i] + 1;
2073         nz = ai[i+1] - ai[i] - 1;
2074         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2075         while (nz--) t[*vj++] -= x[i]*(*v++);
2076       }
2077       for (i=m-1; i>=0; i--){
2078         d  = *(aa + ai[i]);
2079         v  = aa + ai[i] + 1;
2080         vj = aj + ai[i] + 1;
2081         nz = ai[i+1] - ai[i] - 1;
2082         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2083         sum = t[i];
2084         while (nz--) sum -= x[*vj++]*(*v++);
2085         x[i] =   (1-omega)*x[i] + omega*sum/d;
2086       }
2087     }
2088   }
2089 
2090   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2091   if (bb != xx) {
2092     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2099 /*@
2100      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2101               (upper triangular entries in CSR format) provided by the user.
2102 
2103      Collective on MPI_Comm
2104 
2105    Input Parameters:
2106 +  comm - must be an MPI communicator of size 1
2107 .  bs - size of block
2108 .  m - number of rows
2109 .  n - number of columns
2110 .  i - row indices
2111 .  j - column indices
2112 -  a - matrix values
2113 
2114    Output Parameter:
2115 .  mat - the matrix
2116 
2117    Level: intermediate
2118 
2119    Notes:
2120        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2121     once the matrix is destroyed
2122 
2123        You cannot set new nonzero locations into this matrix, that will generate an error.
2124 
2125        The i and j indices are 0 based
2126 
2127 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2128 
2129 @*/
2130 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2131 {
2132   PetscErrorCode ierr;
2133   PetscInt       ii;
2134   Mat_SeqSBAIJ   *sbaij;
2135 
2136   PetscFunctionBegin;
2137   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2138   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2139 
2140   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2141   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2142   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2143   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2144   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2145   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2146 
2147   sbaij->i = i;
2148   sbaij->j = j;
2149   sbaij->a = a;
2150   sbaij->singlemalloc = PETSC_FALSE;
2151   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2152   sbaij->free_a       = PETSC_FALSE;
2153   sbaij->free_ij      = PETSC_FALSE;
2154 
2155   for (ii=0; ii<m; ii++) {
2156     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2157 #if defined(PETSC_USE_DEBUG)
2158     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2159 #endif
2160   }
2161 #if defined(PETSC_USE_DEBUG)
2162   for (ii=0; ii<sbaij->i[m]; ii++) {
2163     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2164     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2165   }
2166 #endif
2167 
2168   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 
2174 
2175 
2176 
2177