xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision e3ed6ec877bedf4e837ee0c4ff2272f4487e99bf)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i;
40 
41   PetscFunctionBegin;
42   if (!a->diag) {
43     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   }
45   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60   if (!blockcompressed) {
61     /* malloc & create the natural set of indices */
62     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
63     for (i=0; i<n+1; i++) {
64       for (j=0; j<bs; j++) {
65         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
66       }
67     }
68     for (i=0; i<nz; i++) {
69       for (j=0; j<bs; j++) {
70         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
71       }
72     }
73   } else { /* blockcompressed */
74     if (oshift == 1) {
75       /* temporarily add 1 to i and j indices */
76       for (i=0; i<nz; i++) a->j[i]++;
77       for (i=0; i<n+1; i++) a->i[i]++;
78     }
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
88 {
89   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
90   PetscInt     i,n = a->mbs,nz = a->i[n];
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95 
96   if (!blockcompressed) {
97     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) { /* blockcompressed */
99     for (i=0; i<nz; i++) a->j[i]--;
100     for (i=0; i<n+1; i++) a->i[i]--;
101   }
102 
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111   PetscErrorCode ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
116 #endif
117   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
118   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
119   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
120   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
121   ierr = PetscFree(a->diag);CHKERRQ(ierr);
122   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
123   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
124   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
125   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
126   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
127   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
128   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
129 
130   ierr = PetscFree(a->inew);CHKERRQ(ierr);
131   ierr = PetscFree(a);CHKERRQ(ierr);
132 
133   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
134   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
135   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
146 {
147   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   switch (op) {
152   case MAT_ROW_ORIENTED:
153     a->roworiented = PETSC_TRUE;
154     break;
155   case MAT_COLUMN_ORIENTED:
156     a->roworiented = PETSC_FALSE;
157     break;
158   case MAT_COLUMNS_SORTED:
159     a->sorted = PETSC_TRUE;
160     break;
161   case MAT_COLUMNS_UNSORTED:
162     a->sorted = PETSC_FALSE;
163     break;
164   case MAT_KEEP_ZEROED_ROWS:
165     a->keepzeroedrows = PETSC_TRUE;
166     break;
167   case MAT_NO_NEW_NONZERO_LOCATIONS:
168     a->nonew = 1;
169     break;
170   case MAT_NEW_NONZERO_LOCATION_ERR:
171     a->nonew = -1;
172     break;
173   case MAT_NEW_NONZERO_ALLOCATION_ERR:
174     a->nonew = -2;
175     break;
176   case MAT_YES_NEW_NONZERO_LOCATIONS:
177     a->nonew = 0;
178     break;
179   case MAT_ROWS_SORTED:
180   case MAT_ROWS_UNSORTED:
181   case MAT_YES_NEW_DIAGONALS:
182   case MAT_IGNORE_OFF_PROC_ENTRIES:
183   case MAT_USE_HASH_TABLE:
184     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
185     break;
186   case MAT_NO_NEW_DIAGONALS:
187     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
188   case MAT_NOT_SYMMETRIC:
189   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
190   case MAT_HERMITIAN:
191     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
192   case MAT_SYMMETRIC:
193   case MAT_STRUCTURALLY_SYMMETRIC:
194   case MAT_NOT_HERMITIAN:
195   case MAT_SYMMETRY_ETERNAL:
196   case MAT_NOT_SYMMETRY_ETERNAL:
197     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
198     break;
199   case MAT_IGNORE_LOWER_TRIANGULAR:
200     a->ignore_ltriangular = PETSC_TRUE;
201     break;
202   case MAT_ERROR_LOWER_TRIANGULAR:
203     a->ignore_ltriangular = PETSC_FALSE;
204     break;
205   case MAT_GETROW_UPPERTRIANGULAR:
206     a->getrow_utriangular = PETSC_TRUE;
207     break;
208   default:
209     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
217 {
218   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
219   PetscErrorCode ierr;
220   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
221   MatScalar      *aa,*aa_i;
222   PetscScalar    *v_i;
223 
224   PetscFunctionBegin;
225   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()");
226   /* Get the upper triangular part of the row */
227   bs  = A->rmap.bs;
228   ai  = a->i;
229   aj  = a->j;
230   aa  = a->a;
231   bs2 = a->bs2;
232 
233   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
234 
235   bn  = row/bs;   /* Block number */
236   bp  = row % bs; /* Block position */
237   M   = ai[bn+1] - ai[bn];
238   *ncols = bs*M;
239 
240   if (v) {
241     *v = 0;
242     if (*ncols) {
243       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
244       for (i=0; i<M; i++) { /* for each block in the block row */
245         v_i  = *v + i*bs;
246         aa_i = aa + bs2*(ai[bn] + i);
247         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
248       }
249     }
250   }
251 
252   if (cols) {
253     *cols = 0;
254     if (*ncols) {
255       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
256       for (i=0; i<M; i++) { /* for each block in the block row */
257         cols_i = *cols + i*bs;
258         itmp  = bs*aj[ai[bn] + i];
259         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
260       }
261     }
262   }
263 
264   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
265   /* this segment is currently removed, so only entries in the upper triangle are obtained */
266 #ifdef column_search
267   v_i    = *v    + M*bs;
268   cols_i = *cols + M*bs;
269   for (i=0; i<bn; i++){ /* for each block row */
270     M = ai[i+1] - ai[i];
271     for (j=0; j<M; j++){
272       itmp = aj[ai[i] + j];    /* block column value */
273       if (itmp == bn){
274         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
275         for (k=0; k<bs; k++) {
276           *cols_i++ = i*bs+k;
277           *v_i++    = aa_i[k];
278         }
279         *ncols += bs;
280         break;
281       }
282     }
283   }
284 #endif
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
291 {
292   PetscErrorCode ierr;
293 
294   PetscFunctionBegin;
295   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
296   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
303 {
304   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
305 
306   PetscFunctionBegin;
307   a->getrow_utriangular = PETSC_TRUE;
308   PetscFunctionReturn(0);
309 }
310 #undef __FUNCT__
311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
313 {
314   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
315 
316   PetscFunctionBegin;
317   a->getrow_utriangular = PETSC_FALSE;
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
324 {
325   PetscErrorCode ierr;
326   PetscFunctionBegin;
327   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
334 {
335   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
336   PetscErrorCode    ierr;
337   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
338   const char        *name;
339   PetscViewerFormat format;
340 
341   PetscFunctionBegin;
342   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
343   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
344   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
345     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
346   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
347     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
348   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
349     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
350     for (i=0; i<a->mbs; i++) {
351       for (j=0; j<bs; j++) {
352         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
353         for (k=a->i[i]; k<a->i[i+1]; k++) {
354           for (l=0; l<bs; l++) {
355 #if defined(PETSC_USE_COMPLEX)
356             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
357               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
358                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
359             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
360               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
361                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
362             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
363               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
364             }
365 #else
366             if (a->a[bs2*k + l*bs + j] != 0.0) {
367               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
368             }
369 #endif
370           }
371         }
372         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
373       }
374     }
375     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
376   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
377      PetscFunctionReturn(0);
378   } else {
379     if (A->factor && bs>1){
380       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
381     }
382     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
383     for (i=0; i<a->mbs; i++) {
384       for (j=0; j<bs; j++) {
385         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
386         for (k=a->i[i]; k<a->i[i+1]; k++) {
387           for (l=0; l<bs; l++) {
388 #if defined(PETSC_USE_COMPLEX)
389             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
390               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
391                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
392             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
393               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
394                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
395             } else {
396               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
397             }
398 #else
399             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
400 #endif
401           }
402         }
403         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
404       }
405     }
406     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
407   }
408   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
414 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
415 {
416   Mat            A = (Mat) Aa;
417   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
418   PetscErrorCode ierr;
419   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
420   PetscMPIInt    rank;
421   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
422   MatScalar      *aa;
423   MPI_Comm       comm;
424   PetscViewer    viewer;
425 
426   PetscFunctionBegin;
427   /*
428     This is nasty. If this is called from an originally parallel matrix
429     then all processes call this,but only the first has the matrix so the
430     rest should return immediately.
431   */
432   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
433   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
434   if (rank) PetscFunctionReturn(0);
435 
436   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
437 
438   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
439   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
440 
441   /* loop over matrix elements drawing boxes */
442   color = PETSC_DRAW_BLUE;
443   for (i=0,row=0; i<mbs; i++,row+=bs) {
444     for (j=a->i[i]; j<a->i[i+1]; j++) {
445       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
446       x_l = a->j[j]*bs; x_r = x_l + 1.0;
447       aa = a->a + j*bs2;
448       for (k=0; k<bs; k++) {
449         for (l=0; l<bs; l++) {
450           if (PetscRealPart(*aa++) >=  0.) continue;
451           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456   color = PETSC_DRAW_CYAN;
457   for (i=0,row=0; i<mbs; i++,row+=bs) {
458     for (j=a->i[i]; j<a->i[i+1]; j++) {
459       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
460       x_l = a->j[j]*bs; x_r = x_l + 1.0;
461       aa = a->a + j*bs2;
462       for (k=0; k<bs; k++) {
463         for (l=0; l<bs; l++) {
464           if (PetscRealPart(*aa++) != 0.) continue;
465           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
466         }
467       }
468     }
469   }
470 
471   color = PETSC_DRAW_RED;
472   for (i=0,row=0; i<mbs; i++,row+=bs) {
473     for (j=a->i[i]; j<a->i[i+1]; j++) {
474       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
475       x_l = a->j[j]*bs; x_r = x_l + 1.0;
476       aa = a->a + j*bs2;
477       for (k=0; k<bs; k++) {
478         for (l=0; l<bs; l++) {
479           if (PetscRealPart(*aa++) <= 0.) continue;
480           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
490 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
491 {
492   PetscErrorCode ierr;
493   PetscReal      xl,yl,xr,yr,w,h;
494   PetscDraw      draw;
495   PetscTruth     isnull;
496 
497   PetscFunctionBegin;
498   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
499   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
500 
501   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
502   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
503   xr += w;    yr += h;  xl = -w;     yl = -h;
504   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
505   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
506   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatView_SeqSBAIJ"
512 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
513 {
514   PetscErrorCode ierr;
515   PetscTruth     iascii,isdraw;
516 
517   PetscFunctionBegin;
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
520   if (iascii){
521     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
522   } else if (isdraw) {
523     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
524   } else {
525     Mat B;
526     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
527     ierr = MatView(B,viewer);CHKERRQ(ierr);
528     ierr = MatDestroy(B);CHKERRQ(ierr);
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
536 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
537 {
538   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
539   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
540   PetscInt     *ai = a->i,*ailen = a->ilen;
541   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
542   MatScalar    *ap,*aa = a->a,zero = 0.0;
543 
544   PetscFunctionBegin;
545   for (k=0; k<m; k++) { /* loop over rows */
546     row  = im[k]; brow = row/bs;
547     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
548     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
549     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
550     nrow = ailen[brow];
551     for (l=0; l<n; l++) { /* loop over columns */
552       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
553       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
554       col  = in[l] ;
555       bcol = col/bs;
556       cidx = col%bs;
557       ridx = row%bs;
558       high = nrow;
559       low  = 0; /* assume unsorted */
560       while (high-low > 5) {
561         t = (low+high)/2;
562         if (rp[t] > bcol) high = t;
563         else             low  = t;
564       }
565       for (i=low; i<high; i++) {
566         if (rp[i] > bcol) break;
567         if (rp[i] == bcol) {
568           *v++ = ap[bs2*i+bs*cidx+ridx];
569           goto finished;
570         }
571       }
572       *v++ = zero;
573        finished:;
574     }
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
582 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
583 {
584   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
585   PetscErrorCode  ierr;
586   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
587   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
588   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
589   PetscTruth      roworiented=a->roworiented;
590   const MatScalar *value = v;
591   MatScalar       *ap,*aa = a->a,*bap;
592 
593   PetscFunctionBegin;
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (k=0; k<m; k++) { /* loop over added rows */
600     row  = im[k];
601     if (row < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
604 #endif
605     rp   = aj + ai[row];
606     ap   = aa + bs2*ai[row];
607     rmax = imax[row];
608     nrow = ailen[row];
609     low  = 0;
610     high = nrow;
611     for (l=0; l<n; l++) { /* loop over added columns */
612       if (in[l] < 0) continue;
613       col = in[l];
614 #if defined(PETSC_USE_DEBUG)
615       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
616 #endif
617       if (col < row) continue; /* ignore lower triangular block */
618       if (roworiented) {
619         value = v + k*(stepval+bs)*bs + l*bs;
620       } else {
621         value = v + l*(stepval+bs)*bs + k*bs;
622       }
623       if (col <= lastcol) low = 0; else high = nrow;
624       lastcol = col;
625       while (high-low > 7) {
626         t = (low+high)/2;
627         if (rp[t] > col) high = t;
628         else             low  = t;
629       }
630       for (i=low; i<high; i++) {
631         if (rp[i] > col) break;
632         if (rp[i] == col) {
633           bap  = ap +  bs2*i;
634           if (roworiented) {
635             if (is == ADD_VALUES) {
636               for (ii=0; ii<bs; ii++,value+=stepval) {
637                 for (jj=ii; jj<bs2; jj+=bs) {
638                   bap[jj] += *value++;
639                 }
640               }
641             } else {
642               for (ii=0; ii<bs; ii++,value+=stepval) {
643                 for (jj=ii; jj<bs2; jj+=bs) {
644                   bap[jj] = *value++;
645                 }
646                }
647             }
648           } else {
649             if (is == ADD_VALUES) {
650               for (ii=0; ii<bs; ii++,value+=stepval) {
651                 for (jj=0; jj<bs; jj++) {
652                   *bap++ += *value++;
653                 }
654               }
655             } else {
656               for (ii=0; ii<bs; ii++,value+=stepval) {
657                 for (jj=0; jj<bs; jj++) {
658                   *bap++  = *value++;
659                 }
660               }
661             }
662           }
663           goto noinsert2;
664         }
665       }
666       if (nonew == 1) goto noinsert2;
667       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
668       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
669       N = nrow++ - 1; high++;
670       /* shift up all the later entries in this row */
671       for (ii=N; ii>=i; ii--) {
672         rp[ii+1] = rp[ii];
673         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
674       }
675       if (N >= i) {
676         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
677       }
678       rp[i] = col;
679       bap   = ap +  bs2*i;
680       if (roworiented) {
681         for (ii=0; ii<bs; ii++,value+=stepval) {
682           for (jj=ii; jj<bs2; jj+=bs) {
683             bap[jj] = *value++;
684           }
685         }
686       } else {
687         for (ii=0; ii<bs; ii++,value+=stepval) {
688           for (jj=0; jj<bs; jj++) {
689             *bap++  = *value++;
690           }
691         }
692        }
693     noinsert2:;
694       low = i;
695     }
696     ailen[row] = nrow;
697   }
698    PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
703 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
704 {
705   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
706   PetscErrorCode ierr;
707   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
708   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
709   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
710   MatScalar      *aa = a->a,*ap;
711 
712   PetscFunctionBegin;
713   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
714 
715   if (m) rmax = ailen[0];
716   for (i=1; i<mbs; i++) {
717     /* move each row back by the amount of empty slots (fshift) before it*/
718     fshift += imax[i-1] - ailen[i-1];
719      rmax   = PetscMax(rmax,ailen[i]);
720      if (fshift) {
721        ip = aj + ai[i]; ap = aa + bs2*ai[i];
722        N = ailen[i];
723        for (j=0; j<N; j++) {
724          ip[j-fshift] = ip[j];
725          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
726        }
727      }
728      ai[i] = ai[i-1] + ailen[i-1];
729   }
730   if (mbs) {
731     fshift += imax[mbs-1] - ailen[mbs-1];
732      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
733   }
734   /* reset ilen and imax for each row */
735   for (i=0; i<mbs; i++) {
736     ailen[i] = imax[i] = ai[i+1] - ai[i];
737   }
738   a->nz = ai[mbs];
739 
740   /* diagonals may have moved, reset it */
741   if (a->diag) {
742     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
743   }
744   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
745   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
746   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
747   a->reallocs          = 0;
748   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
749   PetscFunctionReturn(0);
750 }
751 
752 /*
753    This function returns an array of flags which indicate the locations of contiguous
754    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
755    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
756    Assume: sizes should be long enough to hold all the values.
757 */
758 #undef __FUNCT__
759 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
760 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
761 {
762   PetscInt   i,j,k,row;
763   PetscTruth flg;
764 
765   PetscFunctionBegin;
766    for (i=0,j=0; i<n; j++) {
767      row = idx[i];
768      if (row%bs!=0) { /* Not the begining of a block */
769        sizes[j] = 1;
770        i++;
771      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
772        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
773        i++;
774      } else { /* Begining of the block, so check if the complete block exists */
775        flg = PETSC_TRUE;
776        for (k=1; k<bs; k++) {
777          if (row+k != idx[i+k]) { /* break in the block */
778            flg = PETSC_FALSE;
779            break;
780          }
781        }
782        if (flg) { /* No break in the bs */
783          sizes[j] = bs;
784          i+= bs;
785        } else {
786          sizes[j] = 1;
787          i++;
788        }
789      }
790    }
791    *bs_max = j;
792    PetscFunctionReturn(0);
793 }
794 
795 
796 /* Only add/insert a(i,j) with i<=j (blocks).
797    Any a(i,j) with i>j input by user is ingored.
798 */
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
802 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
803 {
804   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
805   PetscErrorCode ierr;
806   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
807   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
808   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
809   PetscInt       ridx,cidx,bs2=a->bs2;
810   MatScalar      *ap,value,*aa=a->a,*bap;
811 
812   PetscFunctionBegin;
813   for (k=0; k<m; k++) { /* loop over added rows */
814     row  = im[k];       /* row number */
815     brow = row/bs;      /* block row number */
816     if (row < 0) continue;
817 #if defined(PETSC_USE_DEBUG)
818     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
819 #endif
820     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
821     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
822     rmax = imax[brow];  /* maximum space allocated for this row */
823     nrow = ailen[brow]; /* actual length of this row */
824     low  = 0;
825 
826     for (l=0; l<n; l++) { /* loop over added columns */
827       if (in[l] < 0) continue;
828 #if defined(PETSC_USE_DEBUG)
829       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
830 #endif
831       col = in[l];
832       bcol = col/bs;              /* block col number */
833 
834       if (brow > bcol) {
835         if (a->ignore_ltriangular){
836           continue; /* ignore lower triangular values */
837         } else {
838           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
839         }
840       }
841 
842       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
843       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
844         /* element value a(k,l) */
845         if (roworiented) {
846           value = v[l + k*n];
847         } else {
848           value = v[k + l*m];
849         }
850 
851         /* move pointer bap to a(k,l) quickly and add/insert value */
852         if (col <= lastcol) low = 0; high = nrow;
853         lastcol = col;
854         while (high-low > 7) {
855           t = (low+high)/2;
856           if (rp[t] > bcol) high = t;
857           else              low  = t;
858         }
859         for (i=low; i<high; i++) {
860           if (rp[i] > bcol) break;
861           if (rp[i] == bcol) {
862             bap  = ap +  bs2*i + bs*cidx + ridx;
863             if (is == ADD_VALUES) *bap += value;
864             else                  *bap  = value;
865             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
866             if (brow == bcol && ridx < cidx){
867               bap  = ap +  bs2*i + bs*ridx + cidx;
868               if (is == ADD_VALUES) *bap += value;
869               else                  *bap  = value;
870             }
871             goto noinsert1;
872           }
873         }
874 
875         if (nonew == 1) goto noinsert1;
876         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
877         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
878 
879         N = nrow++ - 1; high++;
880         /* shift up all the later entries in this row */
881         for (ii=N; ii>=i; ii--) {
882           rp[ii+1] = rp[ii];
883           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
884         }
885         if (N>=i) {
886           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
887         }
888         rp[i]                      = bcol;
889         ap[bs2*i + bs*cidx + ridx] = value;
890       noinsert1:;
891         low = i;
892       }
893     }   /* end of loop over added columns */
894     ailen[brow] = nrow;
895   }   /* end of loop over added rows */
896   PetscFunctionReturn(0);
897 }
898 
899 #undef __FUNCT__
900 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
901 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
902 {
903   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
904   Mat            outA;
905   PetscErrorCode ierr;
906   PetscTruth     row_identity;
907 
908   PetscFunctionBegin;
909   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
910   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
911   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
912   if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
913 
914   outA        = inA;
915   inA->factor = FACTOR_CHOLESKY;
916 
917   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
918   /*
919     Blocksize < 8 have a special faster factorization/solver
920     for ICC(0) factorization with natural ordering
921   */
922   switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */
923   case 1:
924     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
925     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
926     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
927     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
928     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
929     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
930     ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
931     break;
932   case 2:
933     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
934     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
935     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
936     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
937     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
938     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
939     break;
940   case 3:
941      inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
942      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
943      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
944      inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
945      inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
946      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
947      break;
948   case 4:
949     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
950     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
951     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
952     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
953     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
954     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
955     break;
956   case 5:
957     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
958     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
959     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
960     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
961     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
962     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
963     break;
964   case 6:
965     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
966     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
967     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
968     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
969     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
970     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
971     break;
972   case 7:
973     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
974     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
975     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
976     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
977     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
978     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
979     break;
980   default:
981     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
982     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_N_NaturalOrdering;
983     inA->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
984     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
985     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
986     break;
987   }
988 
989   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
990   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
991   a->row = row;
992   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
993   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
994   a->col = row;
995 
996   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
997   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
998   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
999 
1000   if (!a->solve_work) {
1001     ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1002     ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1003   }
1004 
1005   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 EXTERN_C_BEGIN
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1012 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1013 {
1014   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1015   PetscInt     i,nz,n;
1016 
1017   PetscFunctionBegin;
1018   nz = baij->maxnz;
1019   n  = mat->cmap.n;
1020   for (i=0; i<nz; i++) {
1021     baij->j[i] = indices[i];
1022   }
1023    baij->nz = nz;
1024    for (i=0; i<n; i++) {
1025      baij->ilen[i] = baij->imax[i];
1026    }
1027    PetscFunctionReturn(0);
1028 }
1029 EXTERN_C_END
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1033 /*@
1034   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1035   in the matrix.
1036 
1037   Input Parameters:
1038   +  mat     - the SeqSBAIJ matrix
1039   -  indices - the column indices
1040 
1041   Level: advanced
1042 
1043   Notes:
1044   This can be called if you have precomputed the nonzero structure of the
1045   matrix and want to provide it to the matrix object to improve the performance
1046   of the MatSetValues() operation.
1047 
1048   You MUST have set the correct numbers of nonzeros per row in the call to
1049   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1050 
1051   MUST be called before any calls to MatSetValues()
1052 
1053   .seealso: MatCreateSeqSBAIJ
1054 @*/
1055 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1056 {
1057   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1058 
1059   PetscFunctionBegin;
1060   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1061   PetscValidPointer(indices,2);
1062   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1063   if (f) {
1064     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1065   } else {
1066     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1073 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1074 {
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   /* If the two matrices have the same copy implementation, use fast copy. */
1079   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1080     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1081     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1082 
1083     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1084       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1085     }
1086     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1087   } else {
1088     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1089     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1090     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1097 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1098 {
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1108 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1109 {
1110   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1111   PetscFunctionBegin;
1112   *array = a->a;
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1118 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1119 {
1120   PetscFunctionBegin;
1121   PetscFunctionReturn(0);
1122  }
1123 
1124 #include "petscblaslapack.h"
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1127 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1128 {
1129   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1130   PetscErrorCode ierr;
1131   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1132   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1133 
1134   PetscFunctionBegin;
1135   if (str == SAME_NONZERO_PATTERN) {
1136     PetscScalar alpha = a;
1137     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1138   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1139     if (y->xtoy && y->XtoY != X) {
1140       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1141       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1142     }
1143     if (!y->xtoy) { /* get xtoy */
1144       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1145       y->XtoY = X;
1146     }
1147     bs2 = bs*bs;
1148     for (i=0; i<x->nz; i++) {
1149       j = 0;
1150       while (j < bs2){
1151         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1152         j++;
1153       }
1154     }
1155     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1156   } else {
1157     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1158     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1159     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1166 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1167 {
1168   PetscFunctionBegin;
1169   *flg = PETSC_TRUE;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1175 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1176 {
1177    PetscFunctionBegin;
1178    *flg = PETSC_TRUE;
1179    PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1184 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1185  {
1186    PetscFunctionBegin;
1187    *flg = PETSC_FALSE;
1188    PetscFunctionReturn(0);
1189  }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1193 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1194 {
1195   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1196   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1197   PetscScalar    *aa = a->a;
1198 
1199   PetscFunctionBegin;
1200   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1206 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1207 {
1208   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1209   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1210   PetscScalar    *aa = a->a;
1211 
1212   PetscFunctionBegin;
1213   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /* -------------------------------------------------------------------*/
1218 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1219        MatGetRow_SeqSBAIJ,
1220        MatRestoreRow_SeqSBAIJ,
1221        MatMult_SeqSBAIJ_N,
1222 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1223        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1224        MatMultAdd_SeqSBAIJ_N,
1225        MatSolve_SeqSBAIJ_N,
1226        0,
1227        0,
1228 /*10*/ 0,
1229        0,
1230        MatCholeskyFactor_SeqSBAIJ,
1231        MatRelax_SeqSBAIJ,
1232        MatTranspose_SeqSBAIJ,
1233 /*15*/ MatGetInfo_SeqSBAIJ,
1234        MatEqual_SeqSBAIJ,
1235        MatGetDiagonal_SeqSBAIJ,
1236        MatDiagonalScale_SeqSBAIJ,
1237        MatNorm_SeqSBAIJ,
1238 /*20*/ 0,
1239        MatAssemblyEnd_SeqSBAIJ,
1240        0,
1241        MatSetOption_SeqSBAIJ,
1242        MatZeroEntries_SeqSBAIJ,
1243 /*25*/ 0,
1244        0,
1245        0,
1246        MatCholeskyFactorSymbolic_SeqSBAIJ,
1247        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1248 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1249        0,
1250        MatICCFactorSymbolic_SeqSBAIJ,
1251        MatGetArray_SeqSBAIJ,
1252        MatRestoreArray_SeqSBAIJ,
1253 /*35*/ MatDuplicate_SeqSBAIJ,
1254        MatForwardSolve_SeqSBAIJ_N,
1255        MatBackwardSolve_SeqSBAIJ_N,
1256        0,
1257        MatICCFactor_SeqSBAIJ,
1258 /*40*/ MatAXPY_SeqSBAIJ,
1259        MatGetSubMatrices_SeqSBAIJ,
1260        MatIncreaseOverlap_SeqSBAIJ,
1261        MatGetValues_SeqSBAIJ,
1262        MatCopy_SeqSBAIJ,
1263 /*45*/ 0,
1264        MatScale_SeqSBAIJ,
1265        0,
1266        0,
1267        0,
1268 /*50*/ 0,
1269        MatGetRowIJ_SeqSBAIJ,
1270        MatRestoreRowIJ_SeqSBAIJ,
1271        0,
1272        0,
1273 /*55*/ 0,
1274        0,
1275        0,
1276        0,
1277        MatSetValuesBlocked_SeqSBAIJ,
1278 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1279        0,
1280        0,
1281        0,
1282        0,
1283 /*65*/ 0,
1284        0,
1285        0,
1286        0,
1287        0,
1288 /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1289        0,
1290        0,
1291        0,
1292        0,
1293 /*75*/ 0,
1294        0,
1295        0,
1296        0,
1297        0,
1298 /*80*/ 0,
1299        0,
1300        0,
1301 #if !defined(PETSC_USE_COMPLEX)
1302        MatGetInertia_SeqSBAIJ,
1303 #else
1304        0,
1305 #endif
1306        MatLoad_SeqSBAIJ,
1307 /*85*/ MatIsSymmetric_SeqSBAIJ,
1308        MatIsHermitian_SeqSBAIJ,
1309        MatIsStructurallySymmetric_SeqSBAIJ,
1310        0,
1311        0,
1312 /*90*/ 0,
1313        0,
1314        0,
1315        0,
1316        0,
1317 /*95*/ 0,
1318        0,
1319        0,
1320        0,
1321        0,
1322 /*100*/0,
1323        0,
1324        0,
1325        0,
1326        0,
1327 /*105*/0,
1328        MatRealPart_SeqSBAIJ,
1329        MatImaginaryPart_SeqSBAIJ,
1330        MatGetRowUpperTriangular_SeqSBAIJ,
1331        MatRestoreRowUpperTriangular_SeqSBAIJ
1332 };
1333 
1334 EXTERN_C_BEGIN
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1337 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1338 {
1339   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1340   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   if (aij->nonew != 1) {
1345     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1346   }
1347 
1348   /* allocate space for values if not already there */
1349   if (!aij->saved_values) {
1350     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1351   }
1352 
1353   /* copy values over */
1354   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 EXTERN_C_END
1358 
1359 EXTERN_C_BEGIN
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1362 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1363 {
1364   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1365   PetscErrorCode ierr;
1366   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1367 
1368   PetscFunctionBegin;
1369   if (aij->nonew != 1) {
1370     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1371   }
1372   if (!aij->saved_values) {
1373     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1374   }
1375 
1376   /* copy values over */
1377   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 EXTERN_C_END
1381 
1382 EXTERN_C_BEGIN
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1385 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1386 {
1387   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1388   PetscErrorCode ierr;
1389   PetscInt       i,mbs,bs2;
1390   PetscTruth     skipallocation = PETSC_FALSE,flg;
1391 
1392   PetscFunctionBegin;
1393   B->preallocated = PETSC_TRUE;
1394   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1395   B->rmap.bs = B->cmap.bs = bs;
1396   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1397   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1398 
1399   mbs  = B->rmap.N/bs;
1400   bs2  = bs*bs;
1401 
1402   if (mbs*bs != B->rmap.N) {
1403     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1404   }
1405 
1406   if (nz == MAT_SKIP_ALLOCATION) {
1407     skipallocation = PETSC_TRUE;
1408     nz             = 0;
1409   }
1410 
1411   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1412   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1413   if (nnz) {
1414     for (i=0; i<mbs; i++) {
1415       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1416       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1417     }
1418   }
1419 
1420   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1421   if (!flg) {
1422     switch (bs) {
1423     case 1:
1424       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1425       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1426       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1427       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1428       B->ops->mult             = MatMult_SeqSBAIJ_1;
1429       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1430       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1431       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1432       break;
1433     case 2:
1434       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1435       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1436       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1437       B->ops->mult             = MatMult_SeqSBAIJ_2;
1438       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1439       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1440       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1441       break;
1442     case 3:
1443       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1444       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1445       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1446       B->ops->mult             = MatMult_SeqSBAIJ_3;
1447       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1448       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1449       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1450       break;
1451     case 4:
1452       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1453       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1454       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1455       B->ops->mult             = MatMult_SeqSBAIJ_4;
1456       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1457       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1458       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1459       break;
1460     case 5:
1461       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1462       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1463       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1464       B->ops->mult             = MatMult_SeqSBAIJ_5;
1465       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1466       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1467       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1468       break;
1469     case 6:
1470       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1471       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1472       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1473       B->ops->mult             = MatMult_SeqSBAIJ_6;
1474       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1475       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1476       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1477       break;
1478     case 7:
1479       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1480       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1481       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1482       B->ops->mult             = MatMult_SeqSBAIJ_7;
1483       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1484       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1485       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1486       break;
1487     }
1488   }
1489 
1490   b->mbs = mbs;
1491   b->nbs = mbs;
1492   if (!skipallocation) {
1493     /* b->ilen will count nonzeros in each block row so far. */
1494     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1495     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1496     if (!nnz) {
1497       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1498       else if (nz <= 0)        nz = 1;
1499       for (i=0; i<mbs; i++) {
1500         b->imax[i] = nz;
1501       }
1502       nz = nz*mbs; /* total nz */
1503     } else {
1504       nz = 0;
1505       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1506     }
1507     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1508 
1509     /* allocate the matrix space */
1510     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1511     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1512     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1513     b->singlemalloc = PETSC_TRUE;
1514 
1515     /* pointer to beginning of each row */
1516     b->i[0] = 0;
1517     for (i=1; i<mbs+1; i++) {
1518       b->i[i] = b->i[i-1] + b->imax[i-1];
1519     }
1520     b->free_a     = PETSC_TRUE;
1521     b->free_ij    = PETSC_TRUE;
1522   } else {
1523     b->free_a     = PETSC_FALSE;
1524     b->free_ij    = PETSC_FALSE;
1525   }
1526 
1527   B->rmap.bs               = bs;
1528   b->bs2              = bs2;
1529   b->nz             = 0;
1530   b->maxnz          = nz*bs2;
1531 
1532   b->inew             = 0;
1533   b->jnew             = 0;
1534   b->anew             = 0;
1535   b->a2anew           = 0;
1536   b->permute          = PETSC_FALSE;
1537   PetscFunctionReturn(0);
1538 }
1539 EXTERN_C_END
1540 
1541 EXTERN_C_BEGIN
1542 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1543 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1544 EXTERN_C_END
1545 
1546 /*MC
1547   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1548   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1549 
1550   Options Database Keys:
1551   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1552 
1553   Level: beginner
1554 
1555   .seealso: MatCreateSeqSBAIJ
1556 M*/
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1562 {
1563   Mat_SeqSBAIJ   *b;
1564   PetscErrorCode ierr;
1565   PetscMPIInt    size;
1566   PetscTruth     flg;
1567 
1568   PetscFunctionBegin;
1569   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1570   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1571 
1572   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1573   B->data = (void*)b;
1574   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1575   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1576   B->ops->view        = MatView_SeqSBAIJ;
1577   B->factor           = 0;
1578   B->mapping          = 0;
1579   b->row              = 0;
1580   b->icol             = 0;
1581   b->reallocs         = 0;
1582   b->saved_values     = 0;
1583 
1584 
1585   b->sorted           = PETSC_FALSE;
1586   b->roworiented      = PETSC_TRUE;
1587   b->nonew            = 0;
1588   b->diag             = 0;
1589   b->solve_work       = 0;
1590   b->mult_work        = 0;
1591   B->spptr            = 0;
1592   b->keepzeroedrows   = PETSC_FALSE;
1593   b->xtoy             = 0;
1594   b->XtoY             = 0;
1595 
1596   b->inew             = 0;
1597   b->jnew             = 0;
1598   b->anew             = 0;
1599   b->a2anew           = 0;
1600   b->permute          = PETSC_FALSE;
1601 
1602   b->ignore_ltriangular = PETSC_FALSE;
1603   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1604   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1605 
1606   b->getrow_utriangular = PETSC_FALSE;
1607   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1608   if (flg) b->getrow_utriangular = PETSC_TRUE;
1609 
1610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1611                                      "MatStoreValues_SeqSBAIJ",
1612                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1614                                      "MatRetrieveValues_SeqSBAIJ",
1615                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1617                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1618                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1620                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1621                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1623                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1624                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1626                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1627                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1628 
1629   B->symmetric                  = PETSC_TRUE;
1630   B->structurally_symmetric     = PETSC_TRUE;
1631   B->symmetric_set              = PETSC_TRUE;
1632   B->structurally_symmetric_set = PETSC_TRUE;
1633   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1634   PetscFunctionReturn(0);
1635 }
1636 EXTERN_C_END
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1640 /*@C
1641    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1642    compressed row) format.  For good matrix assembly performance the
1643    user should preallocate the matrix storage by setting the parameter nz
1644    (or the array nnz).  By setting these parameters accurately, performance
1645    during matrix assembly can be increased by more than a factor of 50.
1646 
1647    Collective on Mat
1648 
1649    Input Parameters:
1650 +  A - the symmetric matrix
1651 .  bs - size of block
1652 .  nz - number of block nonzeros per block row (same for all rows)
1653 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1654          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1655 
1656    Options Database Keys:
1657 .   -mat_no_unroll - uses code that does not unroll the loops in the
1658                      block calculations (much slower)
1659 .    -mat_block_size - size of the blocks to use
1660 
1661    Level: intermediate
1662 
1663    Notes:
1664    Specify the preallocated storage with either nz or nnz (not both).
1665    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1666    allocation.  For additional details, see the users manual chapter on
1667    matrices.
1668 
1669    If the nnz parameter is given then the nz parameter is ignored
1670 
1671 
1672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1673 @*/
1674 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1675 {
1676   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1680   if (f) {
1681     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatCreateSeqSBAIJ"
1688 /*@C
1689    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1690    compressed row) format.  For good matrix assembly performance the
1691    user should preallocate the matrix storage by setting the parameter nz
1692    (or the array nnz).  By setting these parameters accurately, performance
1693    during matrix assembly can be increased by more than a factor of 50.
1694 
1695    Collective on MPI_Comm
1696 
1697    Input Parameters:
1698 +  comm - MPI communicator, set to PETSC_COMM_SELF
1699 .  bs - size of block
1700 .  m - number of rows, or number of columns
1701 .  nz - number of block nonzeros per block row (same for all rows)
1702 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1703          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1704 
1705    Output Parameter:
1706 .  A - the symmetric matrix
1707 
1708    Options Database Keys:
1709 .   -mat_no_unroll - uses code that does not unroll the loops in the
1710                      block calculations (much slower)
1711 .    -mat_block_size - size of the blocks to use
1712 
1713    Level: intermediate
1714 
1715    Notes:
1716    The number of rows and columns must be divisible by blocksize.
1717    This matrix type does not support complex Hermitian operation.
1718 
1719    Specify the preallocated storage with either nz or nnz (not both).
1720    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1721    allocation.  For additional details, see the users manual chapter on
1722    matrices.
1723 
1724    If the nnz parameter is given then the nz parameter is ignored
1725 
1726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1727 @*/
1728 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1729 {
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1734   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1735   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1736   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1742 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1743 {
1744   Mat            C;
1745   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1746   PetscErrorCode ierr;
1747   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1748 
1749   PetscFunctionBegin;
1750   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1751 
1752   *B = 0;
1753   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1754   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1755   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1756   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1757   c    = (Mat_SeqSBAIJ*)C->data;
1758 
1759   C->preallocated   = PETSC_TRUE;
1760   C->factor         = A->factor;
1761   c->row            = 0;
1762   c->icol           = 0;
1763   c->saved_values   = 0;
1764   c->keepzeroedrows = a->keepzeroedrows;
1765   C->assembled      = PETSC_TRUE;
1766 
1767   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1768   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1769   c->bs2  = a->bs2;
1770   c->mbs  = a->mbs;
1771   c->nbs  = a->nbs;
1772 
1773   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1774   for (i=0; i<mbs; i++) {
1775     c->imax[i] = a->imax[i];
1776     c->ilen[i] = a->ilen[i];
1777   }
1778 
1779   /* allocate the matrix space */
1780   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1781   c->singlemalloc = PETSC_TRUE;
1782   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1783   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1784   if (mbs > 0) {
1785     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1786     if (cpvalues == MAT_COPY_VALUES) {
1787       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1788     } else {
1789       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1790     }
1791   }
1792 
1793   c->sorted      = a->sorted;
1794   c->roworiented = a->roworiented;
1795   c->nonew       = a->nonew;
1796 
1797   if (a->diag) {
1798     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1799     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1800     for (i=0; i<mbs; i++) {
1801       c->diag[i] = a->diag[i];
1802     }
1803   } else c->diag  = 0;
1804   c->nz           = a->nz;
1805   c->maxnz        = a->maxnz;
1806   c->solve_work   = 0;
1807   c->mult_work    = 0;
1808   c->free_a       = PETSC_TRUE;
1809   c->free_ij      = PETSC_TRUE;
1810   *B = C;
1811   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1817 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1818 {
1819   Mat_SeqSBAIJ   *a;
1820   Mat            B;
1821   PetscErrorCode ierr;
1822   int            fd;
1823   PetscMPIInt    size;
1824   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1825   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1826   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1827   PetscInt       *masked,nmask,tmp,bs2,ishift;
1828   PetscScalar    *aa;
1829   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1830 
1831   PetscFunctionBegin;
1832   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1833   bs2  = bs*bs;
1834 
1835   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1836   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1837   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1838   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1839   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1840   M = header[1]; N = header[2]; nz = header[3];
1841 
1842   if (header[3] < 0) {
1843     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1844   }
1845 
1846   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1847 
1848   /*
1849      This code adds extra rows to make sure the number of rows is
1850     divisible by the blocksize
1851   */
1852   mbs        = M/bs;
1853   extra_rows = bs - M + bs*(mbs);
1854   if (extra_rows == bs) extra_rows = 0;
1855   else                  mbs++;
1856   if (extra_rows) {
1857     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1858   }
1859 
1860   /* read in row lengths */
1861   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1862   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1863   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1864 
1865   /* read in column indices */
1866   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1867   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1868   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1869 
1870   /* loop over row lengths determining block row lengths */
1871   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1872   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1873   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1874   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1875   masked   = mask + mbs;
1876   rowcount = 0; nzcount = 0;
1877   for (i=0; i<mbs; i++) {
1878     nmask = 0;
1879     for (j=0; j<bs; j++) {
1880       kmax = rowlengths[rowcount];
1881       for (k=0; k<kmax; k++) {
1882         tmp = jj[nzcount++]/bs;   /* block col. index */
1883         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1884       }
1885       rowcount++;
1886     }
1887     s_browlengths[i] += nmask;
1888 
1889     /* zero out the mask elements we set */
1890     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1891   }
1892 
1893   /* create our matrix */
1894   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1895   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1896   ierr = MatSetType(B,type);CHKERRQ(ierr);
1897   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1898   a = (Mat_SeqSBAIJ*)B->data;
1899 
1900   /* set matrix "i" values */
1901   a->i[0] = 0;
1902   for (i=1; i<= mbs; i++) {
1903     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1904     a->ilen[i-1] = s_browlengths[i-1];
1905   }
1906   a->nz = a->i[mbs];
1907 
1908   /* read in nonzero values */
1909   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1910   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1911   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1912 
1913   /* set "a" and "j" values into matrix */
1914   nzcount = 0; jcount = 0;
1915   for (i=0; i<mbs; i++) {
1916     nzcountb = nzcount;
1917     nmask    = 0;
1918     for (j=0; j<bs; j++) {
1919       kmax = rowlengths[i*bs+j];
1920       for (k=0; k<kmax; k++) {
1921         tmp = jj[nzcount++]/bs; /* block col. index */
1922         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1923       }
1924     }
1925     /* sort the masked values */
1926     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1927 
1928     /* set "j" values into matrix */
1929     maskcount = 1;
1930     for (j=0; j<nmask; j++) {
1931       a->j[jcount++]  = masked[j];
1932       mask[masked[j]] = maskcount++;
1933     }
1934 
1935     /* set "a" values into matrix */
1936     ishift = bs2*a->i[i];
1937     for (j=0; j<bs; j++) {
1938       kmax = rowlengths[i*bs+j];
1939       for (k=0; k<kmax; k++) {
1940         tmp       = jj[nzcountb]/bs ; /* block col. index */
1941         if (tmp >= i){
1942           block     = mask[tmp] - 1;
1943           point     = jj[nzcountb] - bs*tmp;
1944           idx       = ishift + bs2*block + j + bs*point;
1945           a->a[idx] = aa[nzcountb];
1946         }
1947         nzcountb++;
1948       }
1949     }
1950     /* zero out the mask elements we set */
1951     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1952   }
1953   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1954 
1955   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1956   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1957   ierr = PetscFree(aa);CHKERRQ(ierr);
1958   ierr = PetscFree(jj);CHKERRQ(ierr);
1959   ierr = PetscFree(mask);CHKERRQ(ierr);
1960 
1961   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   ierr = MatView_Private(B);CHKERRQ(ierr);
1964   *A = B;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1970 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1971 {
1972   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1973   MatScalar      *aa=a->a,*v,*v1;
1974   PetscScalar    *x,*b,*t,sum,d;
1975   PetscErrorCode ierr;
1976   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
1977   PetscInt       nz,nz1,*vj,*vj1,i;
1978 
1979   PetscFunctionBegin;
1980   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
1981 
1982   its = its*lits;
1983   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1984 
1985   if (bs > 1)
1986     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1987 
1988   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1989   if (xx != bb) {
1990     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1991   } else {
1992     b = x;
1993   }
1994 
1995   if (!a->relax_work) {
1996     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
1997   }
1998   t = a->relax_work;
1999 
2000   if (flag & SOR_ZERO_INITIAL_GUESS) {
2001     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2002       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2003 
2004       for (i=0; i<m; i++){
2005         d  = *(aa + ai[i]);  /* diag[i] */
2006         v  = aa + ai[i] + 1;
2007         vj = aj + ai[i] + 1;
2008         nz = ai[i+1] - ai[i] - 1;
2009         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2010         x[i] = omega*t[i]/d;
2011         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2012       }
2013     }
2014 
2015     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2016       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2017         t = b;
2018       }
2019 
2020       for (i=m-1; i>=0; i--){
2021         d  = *(aa + ai[i]);
2022         v  = aa + ai[i] + 1;
2023         vj = aj + ai[i] + 1;
2024         nz = ai[i+1] - ai[i] - 1;
2025         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2026         sum = t[i];
2027         while (nz--) sum -= x[*vj++]*(*v++);
2028         x[i] =   (1-omega)*x[i] + omega*sum/d;
2029       }
2030       t = a->relax_work;
2031     }
2032     its--;
2033   }
2034 
2035   while (its--) {
2036     /*
2037        forward sweep:
2038        for i=0,...,m-1:
2039          sum[i] = (b[i] - U(i,:)x )/d[i];
2040          x[i]   = (1-omega)x[i] + omega*sum[i];
2041          b      = b - x[i]*U^T(i,:);
2042 
2043     */
2044     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2045       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2046 
2047       for (i=0; i<m; i++){
2048         d  = *(aa + ai[i]);  /* diag[i] */
2049         v  = aa + ai[i] + 1; v1=v;
2050         vj = aj + ai[i] + 1; vj1=vj;
2051         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2052         sum = t[i];
2053         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2054         while (nz1--) sum -= (*v1++)*x[*vj1++];
2055         x[i] = (1-omega)*x[i] + omega*sum/d;
2056         while (nz--) t[*vj++] -= x[i]*(*v++);
2057       }
2058     }
2059 
2060     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2061       /*
2062        backward sweep:
2063        b = b - x[i]*U^T(i,:), i=0,...,n-2
2064        for i=m-1,...,0:
2065          sum[i] = (b[i] - U(i,:)x )/d[i];
2066          x[i]   = (1-omega)x[i] + omega*sum[i];
2067       */
2068       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2069       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2070 
2071       for (i=0; i<m-1; i++){  /* update rhs */
2072         v  = aa + ai[i] + 1;
2073         vj = aj + ai[i] + 1;
2074         nz = ai[i+1] - ai[i] - 1;
2075         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2076         while (nz--) t[*vj++] -= x[i]*(*v++);
2077       }
2078       for (i=m-1; i>=0; i--){
2079         d  = *(aa + ai[i]);
2080         v  = aa + ai[i] + 1;
2081         vj = aj + ai[i] + 1;
2082         nz = ai[i+1] - ai[i] - 1;
2083         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2084         sum = t[i];
2085         while (nz--) sum -= x[*vj++]*(*v++);
2086         x[i] =   (1-omega)*x[i] + omega*sum/d;
2087       }
2088     }
2089   }
2090 
2091   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2092   if (bb != xx) {
2093     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2094   }
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2100 /*@
2101      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2102               (upper triangular entries in CSR format) provided by the user.
2103 
2104      Collective on MPI_Comm
2105 
2106    Input Parameters:
2107 +  comm - must be an MPI communicator of size 1
2108 .  bs - size of block
2109 .  m - number of rows
2110 .  n - number of columns
2111 .  i - row indices
2112 .  j - column indices
2113 -  a - matrix values
2114 
2115    Output Parameter:
2116 .  mat - the matrix
2117 
2118    Level: intermediate
2119 
2120    Notes:
2121        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2122     once the matrix is destroyed
2123 
2124        You cannot set new nonzero locations into this matrix, that will generate an error.
2125 
2126        The i and j indices are 0 based
2127 
2128 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2129 
2130 @*/
2131 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2132 {
2133   PetscErrorCode ierr;
2134   PetscInt       ii;
2135   Mat_SeqSBAIJ   *sbaij;
2136 
2137   PetscFunctionBegin;
2138   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2139   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2140 
2141   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2142   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2143   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2144   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2145   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2146   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2147 
2148   sbaij->i = i;
2149   sbaij->j = j;
2150   sbaij->a = a;
2151   sbaij->singlemalloc = PETSC_FALSE;
2152   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2153   sbaij->free_a       = PETSC_FALSE;
2154   sbaij->free_ij      = PETSC_FALSE;
2155 
2156   for (ii=0; ii<m; ii++) {
2157     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2158 #if defined(PETSC_USE_DEBUG)
2159     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2160 #endif
2161   }
2162 #if defined(PETSC_USE_DEBUG)
2163   for (ii=0; ii<sbaij->i[m]; ii++) {
2164     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2165     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2166   }
2167 #endif
2168 
2169   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2170   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 
2175 
2176 
2177 
2178