xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 8608aa04b2a6dc8d52d2067b7ceb4ff36b3a4d0b)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i;
40 
41   PetscFunctionBegin;
42   if (!a->diag) {
43     ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
44   }
45   for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60   if (!blockcompressed) {
61     /* malloc & create the natural set of indices */
62     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
63     for (i=0; i<n+1; i++) {
64       for (j=0; j<bs; j++) {
65         *ia[i*bs+j] = a->i[i]*bs+j+oshift;
66       }
67     }
68     for (i=0; i<nz; i++) {
69       for (j=0; j<bs; j++) {
70         *ja[i*bs+j] = a->j[i]*bs+j+oshift;
71       }
72     }
73   } else { /* blockcompressed */
74     if (oshift == 1) {
75       /* temporarily add 1 to i and j indices */
76       for (i=0; i<nz; i++) a->j[i]++;
77       for (i=0; i<n+1; i++) a->i[i]++;
78     }
79     *ia = a->i; *ja = a->j;
80   }
81 
82   PetscFunctionReturn(0);
83 }
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
88 {
89   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
90   PetscInt     i,n = a->mbs,nz = a->i[n];
91   PetscErrorCode ierr;
92 
93   PetscFunctionBegin;
94   if (!ia) PetscFunctionReturn(0);
95 
96   if (!blockcompressed) {
97     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
98   } else if (oshift == 1) { /* blockcompressed */
99     for (i=0; i<nz; i++) a->j[i]--;
100     for (i=0; i<n+1; i++) a->i[i]--;
101   }
102 
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
109 {
110   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
111   PetscErrorCode ierr;
112 
113   PetscFunctionBegin;
114 #if defined(PETSC_USE_LOG)
115   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
116 #endif
117   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
118   if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);}
119   if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);}
120   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
121   ierr = PetscFree(a->diag);CHKERRQ(ierr);
122   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
123   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
124   ierr = PetscFree(a->relax_work);CHKERRQ(ierr);
125   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
126   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
127   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
128   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
129 
130   ierr = PetscFree(a->inew);CHKERRQ(ierr);
131   ierr = PetscFree(a);CHKERRQ(ierr);
132 
133   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
134   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
135   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
136   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
137   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
138   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
139   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
146 {
147   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   switch (op) {
152   case MAT_ROW_ORIENTED:
153     a->roworiented = PETSC_TRUE;
154     break;
155   case MAT_COLUMN_ORIENTED:
156     a->roworiented = PETSC_FALSE;
157     break;
158   case MAT_COLUMNS_SORTED:
159     a->sorted = PETSC_TRUE;
160     break;
161   case MAT_COLUMNS_UNSORTED:
162     a->sorted = PETSC_FALSE;
163     break;
164   case MAT_KEEP_ZEROED_ROWS:
165     a->keepzeroedrows = PETSC_TRUE;
166     break;
167   case MAT_NO_NEW_NONZERO_LOCATIONS:
168     a->nonew = 1;
169     break;
170   case MAT_NEW_NONZERO_LOCATION_ERR:
171     a->nonew = -1;
172     break;
173   case MAT_NEW_NONZERO_ALLOCATION_ERR:
174     a->nonew = -2;
175     break;
176   case MAT_YES_NEW_NONZERO_LOCATIONS:
177     a->nonew = 0;
178     break;
179   case MAT_ROWS_SORTED:
180   case MAT_ROWS_UNSORTED:
181   case MAT_YES_NEW_DIAGONALS:
182   case MAT_IGNORE_OFF_PROC_ENTRIES:
183   case MAT_USE_HASH_TABLE:
184     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
185     break;
186   case MAT_NO_NEW_DIAGONALS:
187     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
188   case MAT_NOT_SYMMETRIC:
189   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
190   case MAT_HERMITIAN:
191     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
192   case MAT_SYMMETRIC:
193   case MAT_STRUCTURALLY_SYMMETRIC:
194   case MAT_NOT_HERMITIAN:
195   case MAT_SYMMETRY_ETERNAL:
196   case MAT_NOT_SYMMETRY_ETERNAL:
197     ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr);
198     break;
199   case MAT_IGNORE_LOWER_TRIANGULAR:
200     a->ignore_ltriangular = PETSC_TRUE;
201     break;
202   case MAT_ERROR_LOWER_TRIANGULAR:
203     a->ignore_ltriangular = PETSC_FALSE;
204     break;
205   case MAT_GETROW_UPPERTRIANGULAR:
206     a->getrow_utriangular = PETSC_TRUE;
207     break;
208   default:
209     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
217 {
218   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
219   PetscErrorCode ierr;
220   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
221   MatScalar      *aa,*aa_i;
222   PetscScalar    *v_i;
223 
224   PetscFunctionBegin;
225   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()");
226   /* Get the upper triangular part of the row */
227   bs  = A->rmap.bs;
228   ai  = a->i;
229   aj  = a->j;
230   aa  = a->a;
231   bs2 = a->bs2;
232 
233   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
234 
235   bn  = row/bs;   /* Block number */
236   bp  = row % bs; /* Block position */
237   M   = ai[bn+1] - ai[bn];
238   *ncols = bs*M;
239 
240   if (v) {
241     *v = 0;
242     if (*ncols) {
243       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
244       for (i=0; i<M; i++) { /* for each block in the block row */
245         v_i  = *v + i*bs;
246         aa_i = aa + bs2*(ai[bn] + i);
247         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
248       }
249     }
250   }
251 
252   if (cols) {
253     *cols = 0;
254     if (*ncols) {
255       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
256       for (i=0; i<M; i++) { /* for each block in the block row */
257         cols_i = *cols + i*bs;
258         itmp  = bs*aj[ai[bn] + i];
259         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
260       }
261     }
262   }
263 
264   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
265   /* this segment is currently removed, so only entries in the upper triangle are obtained */
266 #ifdef column_search
267   v_i    = *v    + M*bs;
268   cols_i = *cols + M*bs;
269   for (i=0; i<bn; i++){ /* for each block row */
270     M = ai[i+1] - ai[i];
271     for (j=0; j<M; j++){
272       itmp = aj[ai[i] + j];    /* block column value */
273       if (itmp == bn){
274         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
275         for (k=0; k<bs; k++) {
276           *cols_i++ = i*bs+k;
277           *v_i++    = aa_i[k];
278         }
279         *ncols += bs;
280         break;
281       }
282     }
283   }
284 #endif
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
291 {
292   PetscErrorCode ierr;
293 
294   PetscFunctionBegin;
295   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
296   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
303 {
304   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
305 
306   PetscFunctionBegin;
307   a->getrow_utriangular = PETSC_TRUE;
308   PetscFunctionReturn(0);
309 }
310 #undef __FUNCT__
311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
313 {
314   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
315 
316   PetscFunctionBegin;
317   a->getrow_utriangular = PETSC_FALSE;
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
324 {
325   PetscErrorCode ierr;
326   PetscFunctionBegin;
327   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
334 {
335   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
336   PetscErrorCode    ierr;
337   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
338   const char        *name;
339   PetscViewerFormat format;
340 
341   PetscFunctionBegin;
342   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
343   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
344   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
345     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
346   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
347     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
348   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
349     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
350     for (i=0; i<a->mbs; i++) {
351       for (j=0; j<bs; j++) {
352         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
353         for (k=a->i[i]; k<a->i[i+1]; k++) {
354           for (l=0; l<bs; l++) {
355 #if defined(PETSC_USE_COMPLEX)
356             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
357               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
358                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
359             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
360               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
361                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
362             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
363               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
364             }
365 #else
366             if (a->a[bs2*k + l*bs + j] != 0.0) {
367               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
368             }
369 #endif
370           }
371         }
372         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
373       }
374     }
375     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
376   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
377      PetscFunctionReturn(0);
378   } else {
379     if (A->factor && bs>1){
380       ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr);
381     }
382     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
383     for (i=0; i<a->mbs; i++) {
384       for (j=0; j<bs; j++) {
385         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
386         for (k=a->i[i]; k<a->i[i+1]; k++) {
387           for (l=0; l<bs; l++) {
388 #if defined(PETSC_USE_COMPLEX)
389             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
390               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
391                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
392             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
393               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
394                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
395             } else {
396               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
397             }
398 #else
399             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
400 #endif
401           }
402         }
403         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
404       }
405     }
406     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
407   }
408   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
414 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
415 {
416   Mat            A = (Mat) Aa;
417   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
418   PetscErrorCode ierr;
419   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
420   PetscMPIInt    rank;
421   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
422   MatScalar      *aa;
423   MPI_Comm       comm;
424   PetscViewer    viewer;
425 
426   PetscFunctionBegin;
427   /*
428     This is nasty. If this is called from an originally parallel matrix
429     then all processes call this,but only the first has the matrix so the
430     rest should return immediately.
431   */
432   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
433   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
434   if (rank) PetscFunctionReturn(0);
435 
436   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
437 
438   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
439   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
440 
441   /* loop over matrix elements drawing boxes */
442   color = PETSC_DRAW_BLUE;
443   for (i=0,row=0; i<mbs; i++,row+=bs) {
444     for (j=a->i[i]; j<a->i[i+1]; j++) {
445       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
446       x_l = a->j[j]*bs; x_r = x_l + 1.0;
447       aa = a->a + j*bs2;
448       for (k=0; k<bs; k++) {
449         for (l=0; l<bs; l++) {
450           if (PetscRealPart(*aa++) >=  0.) continue;
451           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456   color = PETSC_DRAW_CYAN;
457   for (i=0,row=0; i<mbs; i++,row+=bs) {
458     for (j=a->i[i]; j<a->i[i+1]; j++) {
459       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
460       x_l = a->j[j]*bs; x_r = x_l + 1.0;
461       aa = a->a + j*bs2;
462       for (k=0; k<bs; k++) {
463         for (l=0; l<bs; l++) {
464           if (PetscRealPart(*aa++) != 0.) continue;
465           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
466         }
467       }
468     }
469   }
470 
471   color = PETSC_DRAW_RED;
472   for (i=0,row=0; i<mbs; i++,row+=bs) {
473     for (j=a->i[i]; j<a->i[i+1]; j++) {
474       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
475       x_l = a->j[j]*bs; x_r = x_l + 1.0;
476       aa = a->a + j*bs2;
477       for (k=0; k<bs; k++) {
478         for (l=0; l<bs; l++) {
479           if (PetscRealPart(*aa++) <= 0.) continue;
480           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
490 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
491 {
492   PetscErrorCode ierr;
493   PetscReal      xl,yl,xr,yr,w,h;
494   PetscDraw      draw;
495   PetscTruth     isnull;
496 
497   PetscFunctionBegin;
498   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
499   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
500 
501   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
502   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
503   xr += w;    yr += h;  xl = -w;     yl = -h;
504   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
505   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
506   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatView_SeqSBAIJ"
512 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
513 {
514   PetscErrorCode ierr;
515   PetscTruth     iascii,isdraw;
516 
517   PetscFunctionBegin;
518   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
519   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
520   if (iascii){
521     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
522   } else if (isdraw) {
523     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
524   } else {
525     Mat B;
526     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
527     ierr = MatView(B,viewer);CHKERRQ(ierr);
528     ierr = MatDestroy(B);CHKERRQ(ierr);
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
536 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
537 {
538   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
539   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
540   PetscInt     *ai = a->i,*ailen = a->ilen;
541   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
542   MatScalar    *ap,*aa = a->a,zero = 0.0;
543 
544   PetscFunctionBegin;
545   for (k=0; k<m; k++) { /* loop over rows */
546     row  = im[k]; brow = row/bs;
547     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
548     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
549     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
550     nrow = ailen[brow];
551     for (l=0; l<n; l++) { /* loop over columns */
552       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
553       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
554       col  = in[l] ;
555       bcol = col/bs;
556       cidx = col%bs;
557       ridx = row%bs;
558       high = nrow;
559       low  = 0; /* assume unsorted */
560       while (high-low > 5) {
561         t = (low+high)/2;
562         if (rp[t] > bcol) high = t;
563         else             low  = t;
564       }
565       for (i=low; i<high; i++) {
566         if (rp[i] > bcol) break;
567         if (rp[i] == bcol) {
568           *v++ = ap[bs2*i+bs*cidx+ridx];
569           goto finished;
570         }
571       }
572       *v++ = zero;
573        finished:;
574     }
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
582 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
583 {
584   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
585   PetscErrorCode  ierr;
586   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
587   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
588   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
589   PetscTruth      roworiented=a->roworiented;
590   const MatScalar *value = v;
591   MatScalar       *ap,*aa = a->a,*bap;
592 
593   PetscFunctionBegin;
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (k=0; k<m; k++) { /* loop over added rows */
600     row  = im[k];
601     if (row < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
604 #endif
605     rp   = aj + ai[row];
606     ap   = aa + bs2*ai[row];
607     rmax = imax[row];
608     nrow = ailen[row];
609     low  = 0;
610     high = nrow;
611     for (l=0; l<n; l++) { /* loop over added columns */
612       if (in[l] < 0) continue;
613       col = in[l];
614 #if defined(PETSC_USE_DEBUG)
615       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
616 #endif
617       if (col < row) continue; /* ignore lower triangular block */
618       if (roworiented) {
619         value = v + k*(stepval+bs)*bs + l*bs;
620       } else {
621         value = v + l*(stepval+bs)*bs + k*bs;
622       }
623       if (col <= lastcol) low = 0; else high = nrow;
624       lastcol = col;
625       while (high-low > 7) {
626         t = (low+high)/2;
627         if (rp[t] > col) high = t;
628         else             low  = t;
629       }
630       for (i=low; i<high; i++) {
631         if (rp[i] > col) break;
632         if (rp[i] == col) {
633           bap  = ap +  bs2*i;
634           if (roworiented) {
635             if (is == ADD_VALUES) {
636               for (ii=0; ii<bs; ii++,value+=stepval) {
637                 for (jj=ii; jj<bs2; jj+=bs) {
638                   bap[jj] += *value++;
639                 }
640               }
641             } else {
642               for (ii=0; ii<bs; ii++,value+=stepval) {
643                 for (jj=ii; jj<bs2; jj+=bs) {
644                   bap[jj] = *value++;
645                 }
646                }
647             }
648           } else {
649             if (is == ADD_VALUES) {
650               for (ii=0; ii<bs; ii++,value+=stepval) {
651                 for (jj=0; jj<bs; jj++) {
652                   *bap++ += *value++;
653                 }
654               }
655             } else {
656               for (ii=0; ii<bs; ii++,value+=stepval) {
657                 for (jj=0; jj<bs; jj++) {
658                   *bap++  = *value++;
659                 }
660               }
661             }
662           }
663           goto noinsert2;
664         }
665       }
666       if (nonew == 1) goto noinsert2;
667       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
668       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
669       N = nrow++ - 1; high++;
670       /* shift up all the later entries in this row */
671       for (ii=N; ii>=i; ii--) {
672         rp[ii+1] = rp[ii];
673         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
674       }
675       if (N >= i) {
676         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
677       }
678       rp[i] = col;
679       bap   = ap +  bs2*i;
680       if (roworiented) {
681         for (ii=0; ii<bs; ii++,value+=stepval) {
682           for (jj=ii; jj<bs2; jj+=bs) {
683             bap[jj] = *value++;
684           }
685         }
686       } else {
687         for (ii=0; ii<bs; ii++,value+=stepval) {
688           for (jj=0; jj<bs; jj++) {
689             *bap++  = *value++;
690           }
691         }
692        }
693     noinsert2:;
694       low = i;
695     }
696     ailen[row] = nrow;
697   }
698    PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
703 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
704 {
705   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
706   PetscErrorCode ierr;
707   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
708   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
709   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
710   MatScalar      *aa = a->a,*ap;
711 
712   PetscFunctionBegin;
713   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
714 
715   if (m) rmax = ailen[0];
716   for (i=1; i<mbs; i++) {
717     /* move each row back by the amount of empty slots (fshift) before it*/
718     fshift += imax[i-1] - ailen[i-1];
719      rmax   = PetscMax(rmax,ailen[i]);
720      if (fshift) {
721        ip = aj + ai[i]; ap = aa + bs2*ai[i];
722        N = ailen[i];
723        for (j=0; j<N; j++) {
724          ip[j-fshift] = ip[j];
725          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
726        }
727      }
728      ai[i] = ai[i-1] + ailen[i-1];
729   }
730   if (mbs) {
731     fshift += imax[mbs-1] - ailen[mbs-1];
732      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
733   }
734   /* reset ilen and imax for each row */
735   for (i=0; i<mbs; i++) {
736     ailen[i] = imax[i] = ai[i+1] - ai[i];
737   }
738   a->nz = ai[mbs];
739 
740   /* diagonals may have moved, reset it */
741   if (a->diag) {
742     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
743   }
744   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
745   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
746   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
747   a->reallocs          = 0;
748   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
749   PetscFunctionReturn(0);
750 }
751 
752 /*
753    This function returns an array of flags which indicate the locations of contiguous
754    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
755    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
756    Assume: sizes should be long enough to hold all the values.
757 */
758 #undef __FUNCT__
759 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
760 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
761 {
762   PetscInt   i,j,k,row;
763   PetscTruth flg;
764 
765   PetscFunctionBegin;
766    for (i=0,j=0; i<n; j++) {
767      row = idx[i];
768      if (row%bs!=0) { /* Not the begining of a block */
769        sizes[j] = 1;
770        i++;
771      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
772        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
773        i++;
774      } else { /* Begining of the block, so check if the complete block exists */
775        flg = PETSC_TRUE;
776        for (k=1; k<bs; k++) {
777          if (row+k != idx[i+k]) { /* break in the block */
778            flg = PETSC_FALSE;
779            break;
780          }
781        }
782        if (flg) { /* No break in the bs */
783          sizes[j] = bs;
784          i+= bs;
785        } else {
786          sizes[j] = 1;
787          i++;
788        }
789      }
790    }
791    *bs_max = j;
792    PetscFunctionReturn(0);
793 }
794 
795 
796 /* Only add/insert a(i,j) with i<=j (blocks).
797    Any a(i,j) with i>j input by user is ingored.
798 */
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
802 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
803 {
804   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
805   PetscErrorCode ierr;
806   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
807   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
808   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
809   PetscInt       ridx,cidx,bs2=a->bs2;
810   MatScalar      *ap,value,*aa=a->a,*bap;
811 
812   PetscFunctionBegin;
813   for (k=0; k<m; k++) { /* loop over added rows */
814     row  = im[k];       /* row number */
815     brow = row/bs;      /* block row number */
816     if (row < 0) continue;
817 #if defined(PETSC_USE_DEBUG)
818     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
819 #endif
820     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
821     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
822     rmax = imax[brow];  /* maximum space allocated for this row */
823     nrow = ailen[brow]; /* actual length of this row */
824     low  = 0;
825 
826     for (l=0; l<n; l++) { /* loop over added columns */
827       if (in[l] < 0) continue;
828 #if defined(PETSC_USE_DEBUG)
829       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
830 #endif
831       col = in[l];
832       bcol = col/bs;              /* block col number */
833 
834       if (brow > bcol) {
835         if (a->ignore_ltriangular){
836           continue; /* ignore lower triangular values */
837         } else {
838           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
839         }
840       }
841 
842       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
843       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
844         /* element value a(k,l) */
845         if (roworiented) {
846           value = v[l + k*n];
847         } else {
848           value = v[k + l*m];
849         }
850 
851         /* move pointer bap to a(k,l) quickly and add/insert value */
852         if (col <= lastcol) low = 0; high = nrow;
853         lastcol = col;
854         while (high-low > 7) {
855           t = (low+high)/2;
856           if (rp[t] > bcol) high = t;
857           else              low  = t;
858         }
859         for (i=low; i<high; i++) {
860           if (rp[i] > bcol) break;
861           if (rp[i] == bcol) {
862             bap  = ap +  bs2*i + bs*cidx + ridx;
863             if (is == ADD_VALUES) *bap += value;
864             else                  *bap  = value;
865             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
866             if (brow == bcol && ridx < cidx){
867               bap  = ap +  bs2*i + bs*ridx + cidx;
868               if (is == ADD_VALUES) *bap += value;
869               else                  *bap  = value;
870             }
871             goto noinsert1;
872           }
873         }
874 
875         if (nonew == 1) goto noinsert1;
876         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
877         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
878 
879         N = nrow++ - 1; high++;
880         /* shift up all the later entries in this row */
881         for (ii=N; ii>=i; ii--) {
882           rp[ii+1] = rp[ii];
883           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
884         }
885         if (N>=i) {
886           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
887         }
888         rp[i]                      = bcol;
889         ap[bs2*i + bs*cidx + ridx] = value;
890       noinsert1:;
891         low = i;
892       }
893     }   /* end of loop over added columns */
894     ailen[brow] = nrow;
895   }   /* end of loop over added rows */
896   PetscFunctionReturn(0);
897 }
898 
899 #undef __FUNCT__
900 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
901 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
902 {
903   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
904   Mat            outA;
905   PetscErrorCode ierr;
906   PetscTruth     row_identity;
907 
908   PetscFunctionBegin;
909   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
910   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
911   if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
912   if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
913 
914   outA        = inA;
915   inA->factor = FACTOR_CHOLESKY;
916 
917   ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
918   /*
919     Blocksize < 8 have a special faster factorization/solver
920     for ICC(0) factorization with natural ordering
921   */
922   switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */
923   case 1:
924     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
925     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
926     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
927     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
928     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
929     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
930     ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
931     break;
932   case 2:
933     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
934     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
935     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
936     inA->ops->forwardsolve     = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering;
937     inA->ops->backwardsolve    = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering;
938     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
939     break;
940   case 3:
941      inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
942      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
943      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
944      inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering;
945      inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering;
946      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
947      break;
948   case 4:
949     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
950     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
951     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
952     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering;
953     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering;
954     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
955     break;
956   case 5:
957     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
958     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
959     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
960     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering;
961     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering;
962     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
963     break;
964   case 6:
965     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
966     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
967     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
968     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering;
969     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering;
970     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
971     break;
972   case 7:
973     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
974     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
975     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
976     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering;
977     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering;
978     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
979     break;
980   default:
981     inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
982     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_N_NaturalOrdering;
983     inA->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
984     inA->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering;
985     inA->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering;
986     break;
987   }
988 
989   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
990   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
991   a->row = row;
992   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
993   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
994   a->col = row;
995 
996   /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
997   if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);}
998   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
999 
1000   if (!a->solve_work) {
1001     ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1002     ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1003   }
1004 
1005   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 EXTERN_C_BEGIN
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1012 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1013 {
1014   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1015   PetscInt     i,nz,n;
1016 
1017   PetscFunctionBegin;
1018   nz = baij->maxnz;
1019   n  = mat->cmap.n;
1020   for (i=0; i<nz; i++) {
1021     baij->j[i] = indices[i];
1022   }
1023    baij->nz = nz;
1024    for (i=0; i<n; i++) {
1025      baij->ilen[i] = baij->imax[i];
1026    }
1027    PetscFunctionReturn(0);
1028 }
1029 EXTERN_C_END
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1033 /*@
1034   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1035   in the matrix.
1036 
1037   Input Parameters:
1038   +  mat     - the SeqSBAIJ matrix
1039   -  indices - the column indices
1040 
1041   Level: advanced
1042 
1043   Notes:
1044   This can be called if you have precomputed the nonzero structure of the
1045   matrix and want to provide it to the matrix object to improve the performance
1046   of the MatSetValues() operation.
1047 
1048   You MUST have set the correct numbers of nonzeros per row in the call to
1049   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1050 
1051   MUST be called before any calls to MatSetValues()
1052 
1053   .seealso: MatCreateSeqSBAIJ
1054 @*/
1055 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1056 {
1057   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1058 
1059   PetscFunctionBegin;
1060   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1061   PetscValidPointer(indices,2);
1062   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1063   if (f) {
1064     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1065   } else {
1066     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1073 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1074 {
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   /* If the two matrices have the same copy implementation, use fast copy. */
1079   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1080     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1081     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1082 
1083     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1084       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1085     }
1086     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1087   } else {
1088     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1089     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1090     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1097 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1098 {
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1108 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1109 {
1110   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1111   PetscFunctionBegin;
1112   *array = a->a;
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1118 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1119 {
1120   PetscFunctionBegin;
1121   PetscFunctionReturn(0);
1122  }
1123 
1124 #include "petscblaslapack.h"
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1127 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1128 {
1129   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1130   PetscErrorCode ierr;
1131   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1132   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1133 
1134   PetscFunctionBegin;
1135   if (str == SAME_NONZERO_PATTERN) {
1136     PetscScalar alpha = a;
1137     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1138   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1139     if (y->xtoy && y->XtoY != X) {
1140       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1141       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1142     }
1143     if (!y->xtoy) { /* get xtoy */
1144       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1145       y->XtoY = X;
1146     }
1147     bs2 = bs*bs;
1148     for (i=0; i<x->nz; i++) {
1149       j = 0;
1150       while (j < bs2){
1151         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1152         j++;
1153       }
1154     }
1155     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1156   } else {
1157     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1158     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1159     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1166 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1167 {
1168   PetscFunctionBegin;
1169   *flg = PETSC_TRUE;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1175 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1176 {
1177    PetscFunctionBegin;
1178    *flg = PETSC_TRUE;
1179    PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1184 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1185  {
1186    PetscFunctionBegin;
1187    *flg = PETSC_FALSE;
1188    PetscFunctionReturn(0);
1189  }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1193 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1194 {
1195   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1196   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1197   PetscScalar    *aa = a->a;
1198 
1199   PetscFunctionBegin;
1200   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1206 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1207 {
1208   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1209   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1210   PetscScalar    *aa = a->a;
1211 
1212   PetscFunctionBegin;
1213   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /* -------------------------------------------------------------------*/
1218 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1219        MatGetRow_SeqSBAIJ,
1220        MatRestoreRow_SeqSBAIJ,
1221        MatMult_SeqSBAIJ_N,
1222 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1223        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1224        MatMultAdd_SeqSBAIJ_N,
1225        MatSolve_SeqSBAIJ_N,
1226        0,
1227        0,
1228 /*10*/ 0,
1229        0,
1230        MatCholeskyFactor_SeqSBAIJ,
1231        MatRelax_SeqSBAIJ,
1232        MatTranspose_SeqSBAIJ,
1233 /*15*/ MatGetInfo_SeqSBAIJ,
1234        MatEqual_SeqSBAIJ,
1235        MatGetDiagonal_SeqSBAIJ,
1236        MatDiagonalScale_SeqSBAIJ,
1237        MatNorm_SeqSBAIJ,
1238 /*20*/ 0,
1239        MatAssemblyEnd_SeqSBAIJ,
1240        0,
1241        MatSetOption_SeqSBAIJ,
1242        MatZeroEntries_SeqSBAIJ,
1243 /*25*/ 0,
1244        0,
1245        0,
1246        MatCholeskyFactorSymbolic_SeqSBAIJ,
1247        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1248 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1249        0,
1250        MatICCFactorSymbolic_SeqSBAIJ,
1251        MatGetArray_SeqSBAIJ,
1252        MatRestoreArray_SeqSBAIJ,
1253 /*35*/ MatDuplicate_SeqSBAIJ,
1254        MatForwardSolve_SeqSBAIJ_N,
1255        MatBackwardSolve_SeqSBAIJ_N,
1256        0,
1257        MatICCFactor_SeqSBAIJ,
1258 /*40*/ MatAXPY_SeqSBAIJ,
1259        MatGetSubMatrices_SeqSBAIJ,
1260        MatIncreaseOverlap_SeqSBAIJ,
1261        MatGetValues_SeqSBAIJ,
1262        MatCopy_SeqSBAIJ,
1263 /*45*/ 0,
1264        MatScale_SeqSBAIJ,
1265        0,
1266        0,
1267        0,
1268 /*50*/ 0,
1269        MatGetRowIJ_SeqSBAIJ,
1270        MatRestoreRowIJ_SeqSBAIJ,
1271        0,
1272        0,
1273 /*55*/ 0,
1274        0,
1275        0,
1276        0,
1277        MatSetValuesBlocked_SeqSBAIJ,
1278 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1279        0,
1280        0,
1281        0,
1282        0,
1283 /*65*/ 0,
1284        0,
1285        0,
1286        0,
1287        0,
1288 /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1289        0,
1290        0,
1291        0,
1292        0,
1293 /*75*/ 0,
1294        0,
1295        0,
1296        0,
1297        0,
1298 /*80*/ 0,
1299        0,
1300        0,
1301 #if !defined(PETSC_USE_COMPLEX)
1302        MatGetInertia_SeqSBAIJ,
1303 #else
1304        0,
1305 #endif
1306        MatLoad_SeqSBAIJ,
1307 /*85*/ MatIsSymmetric_SeqSBAIJ,
1308        MatIsHermitian_SeqSBAIJ,
1309        MatIsStructurallySymmetric_SeqSBAIJ,
1310        0,
1311        0,
1312 /*90*/ 0,
1313        0,
1314        0,
1315        0,
1316        0,
1317 /*95*/ 0,
1318        0,
1319        0,
1320        0,
1321        0,
1322 /*100*/0,
1323        0,
1324        0,
1325        0,
1326        0,
1327 /*105*/0,
1328        MatRealPart_SeqSBAIJ,
1329        MatImaginaryPart_SeqSBAIJ,
1330        MatGetRowUpperTriangular_SeqSBAIJ,
1331        MatRestoreRowUpperTriangular_SeqSBAIJ
1332 };
1333 
1334 EXTERN_C_BEGIN
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1337 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1338 {
1339   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1340   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   if (aij->nonew != 1) {
1345     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1346   }
1347 
1348   /* allocate space for values if not already there */
1349   if (!aij->saved_values) {
1350     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1351   }
1352 
1353   /* copy values over */
1354   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 EXTERN_C_END
1358 
1359 EXTERN_C_BEGIN
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1362 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1363 {
1364   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1365   PetscErrorCode ierr;
1366   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1367 
1368   PetscFunctionBegin;
1369   if (aij->nonew != 1) {
1370     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1371   }
1372   if (!aij->saved_values) {
1373     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1374   }
1375 
1376   /* copy values over */
1377   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 EXTERN_C_END
1381 
1382 EXTERN_C_BEGIN
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1385 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1386 {
1387   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1388   PetscErrorCode ierr;
1389   PetscInt       i,mbs,bs2;
1390   PetscTruth     skipallocation = PETSC_FALSE,flg;
1391 
1392   PetscFunctionBegin;
1393   B->preallocated = PETSC_TRUE;
1394   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1395   B->rmap.bs = B->cmap.bs = bs;
1396   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1397   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1398 
1399   mbs  = B->rmap.N/bs;
1400   bs2  = bs*bs;
1401 
1402   if (mbs*bs != B->rmap.N) {
1403     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1404   }
1405 
1406   if (nz == MAT_SKIP_ALLOCATION) {
1407     skipallocation = PETSC_TRUE;
1408     nz             = 0;
1409   }
1410 
1411   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1412   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1413   if (nnz) {
1414     for (i=0; i<mbs; i++) {
1415       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1416       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1417     }
1418   }
1419 
1420   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1421   if (!flg) {
1422     switch (bs) {
1423     case 1:
1424       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1425       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1426       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1427       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1428       B->ops->mult             = MatMult_SeqSBAIJ_1;
1429       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1430       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1431       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1432       break;
1433     case 2:
1434       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1435       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1436       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1437       B->ops->mult             = MatMult_SeqSBAIJ_2;
1438       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1439       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1440       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1441       break;
1442     case 3:
1443       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1444       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1445       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1446       B->ops->mult             = MatMult_SeqSBAIJ_3;
1447       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1448       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1449       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1450       break;
1451     case 4:
1452       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1453       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1454       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1455       B->ops->mult             = MatMult_SeqSBAIJ_4;
1456       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1457       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1458       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1459       break;
1460     case 5:
1461       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1462       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1463       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1464       B->ops->mult             = MatMult_SeqSBAIJ_5;
1465       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1466       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1467       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1468       break;
1469     case 6:
1470       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1471       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1472       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1473       B->ops->mult             = MatMult_SeqSBAIJ_6;
1474       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1475       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1476       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1477       break;
1478     case 7:
1479       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1480       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1481       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1482       B->ops->mult             = MatMult_SeqSBAIJ_7;
1483       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1484       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1485       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1486       break;
1487     }
1488   }
1489 
1490   b->mbs = mbs;
1491   b->nbs = mbs;
1492   if (!skipallocation) {
1493     /* b->ilen will count nonzeros in each block row so far. */
1494     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1495     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1496     if (!nnz) {
1497       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1498       else if (nz <= 0)        nz = 1;
1499       for (i=0; i<mbs; i++) {
1500         b->imax[i] = nz;
1501       }
1502       nz = nz*mbs; /* total nz */
1503     } else {
1504       nz = 0;
1505       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1506     }
1507     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1508 
1509     /* allocate the matrix space */
1510     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1511     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1512     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1513     b->singlemalloc = PETSC_TRUE;
1514 
1515     /* pointer to beginning of each row */
1516     b->i[0] = 0;
1517     for (i=1; i<mbs+1; i++) {
1518       b->i[i] = b->i[i-1] + b->imax[i-1];
1519     }
1520     b->free_a     = PETSC_TRUE;
1521     b->free_ij    = PETSC_TRUE;
1522   } else {
1523     b->free_a     = PETSC_FALSE;
1524     b->free_ij    = PETSC_FALSE;
1525   }
1526 
1527   B->rmap.bs               = bs;
1528   b->bs2              = bs2;
1529   b->nz             = 0;
1530   b->maxnz          = nz*bs2;
1531 
1532   b->inew             = 0;
1533   b->jnew             = 0;
1534   b->anew             = 0;
1535   b->a2anew           = 0;
1536   b->permute          = PETSC_FALSE;
1537   PetscFunctionReturn(0);
1538 }
1539 EXTERN_C_END
1540 
1541 EXTERN_C_BEGIN
1542 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1543 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1544 EXTERN_C_END
1545 
1546 /*MC
1547   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1548   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1549 
1550   Options Database Keys:
1551   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1552 
1553   Level: beginner
1554 
1555   .seealso: MatCreateSeqSBAIJ
1556 M*/
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1562 {
1563   Mat_SeqSBAIJ   *b;
1564   PetscErrorCode ierr;
1565   PetscMPIInt    size;
1566   PetscTruth     flg;
1567 
1568   PetscFunctionBegin;
1569   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1570   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1571 
1572   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1573   B->data = (void*)b;
1574   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1575   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1576   B->ops->view        = MatView_SeqSBAIJ;
1577   B->factor           = 0;
1578   B->mapping          = 0;
1579   b->row              = 0;
1580   b->icol             = 0;
1581   b->reallocs         = 0;
1582   b->saved_values     = 0;
1583 
1584 
1585   b->sorted           = PETSC_FALSE;
1586   b->roworiented      = PETSC_TRUE;
1587   b->nonew            = 0;
1588   b->diag             = 0;
1589   b->solve_work       = 0;
1590   b->mult_work        = 0;
1591   B->spptr            = 0;
1592   b->keepzeroedrows   = PETSC_FALSE;
1593   b->xtoy             = 0;
1594   b->XtoY             = 0;
1595 
1596   b->inew             = 0;
1597   b->jnew             = 0;
1598   b->anew             = 0;
1599   b->a2anew           = 0;
1600   b->permute          = PETSC_FALSE;
1601 
1602   b->ignore_ltriangular = PETSC_FALSE;
1603   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1604   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1605 
1606   b->getrow_utriangular = PETSC_FALSE;
1607   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1608   if (flg) b->getrow_utriangular = PETSC_TRUE;
1609 
1610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1611                                      "MatStoreValues_SeqSBAIJ",
1612                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1614                                      "MatRetrieveValues_SeqSBAIJ",
1615                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1617                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1618                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1620                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1621                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1623                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1624                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1626                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1627                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1628 
1629   B->symmetric                  = PETSC_TRUE;
1630   B->structurally_symmetric     = PETSC_TRUE;
1631   B->symmetric_set              = PETSC_TRUE;
1632   B->structurally_symmetric_set = PETSC_TRUE;
1633   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr);
1634   PetscFunctionReturn(0);
1635 }
1636 EXTERN_C_END
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1640 /*@C
1641    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1642    compressed row) format.  For good matrix assembly performance the
1643    user should preallocate the matrix storage by setting the parameter nz
1644    (or the array nnz).  By setting these parameters accurately, performance
1645    during matrix assembly can be increased by more than a factor of 50.
1646 
1647    Collective on Mat
1648 
1649    Input Parameters:
1650 +  A - the symmetric matrix
1651 .  bs - size of block
1652 .  nz - number of block nonzeros per block row (same for all rows)
1653 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1654          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1655 
1656    Options Database Keys:
1657 .   -mat_no_unroll - uses code that does not unroll the loops in the
1658                      block calculations (much slower)
1659 .    -mat_block_size - size of the blocks to use
1660 
1661    Level: intermediate
1662 
1663    Notes:
1664    Specify the preallocated storage with either nz or nnz (not both).
1665    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1666    allocation.  For additional details, see the users manual chapter on
1667    matrices.
1668 
1669    If the nnz parameter is given then the nz parameter is ignored
1670 
1671 
1672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1673 @*/
1674 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1675 {
1676   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1677 
1678   PetscFunctionBegin;
1679   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1680   if (f) {
1681     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatCreateSeqSBAIJ"
1688 /*@C
1689    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1690    compressed row) format.  For good matrix assembly performance the
1691    user should preallocate the matrix storage by setting the parameter nz
1692    (or the array nnz).  By setting these parameters accurately, performance
1693    during matrix assembly can be increased by more than a factor of 50.
1694 
1695    Collective on MPI_Comm
1696 
1697    Input Parameters:
1698 +  comm - MPI communicator, set to PETSC_COMM_SELF
1699 .  bs - size of block
1700 .  m - number of rows, or number of columns
1701 .  nz - number of block nonzeros per block row (same for all rows)
1702 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1703          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1704 
1705    Output Parameter:
1706 .  A - the symmetric matrix
1707 
1708    Options Database Keys:
1709 .   -mat_no_unroll - uses code that does not unroll the loops in the
1710                      block calculations (much slower)
1711 .    -mat_block_size - size of the blocks to use
1712 
1713    Level: intermediate
1714 
1715    Notes:
1716 
1717    Specify the preallocated storage with either nz or nnz (not both).
1718    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1719    allocation.  For additional details, see the users manual chapter on
1720    matrices.
1721 
1722    If the nnz parameter is given then the nz parameter is ignored
1723 
1724 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1725 @*/
1726 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1727 {
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1732   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1733   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1734   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1740 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1741 {
1742   Mat            C;
1743   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1744   PetscErrorCode ierr;
1745   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1746 
1747   PetscFunctionBegin;
1748   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1749 
1750   *B = 0;
1751   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1752   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1753   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1754   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1755   c    = (Mat_SeqSBAIJ*)C->data;
1756 
1757   C->preallocated   = PETSC_TRUE;
1758   C->factor         = A->factor;
1759   c->row            = 0;
1760   c->icol           = 0;
1761   c->saved_values   = 0;
1762   c->keepzeroedrows = a->keepzeroedrows;
1763   C->assembled      = PETSC_TRUE;
1764 
1765   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1766   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1767   c->bs2  = a->bs2;
1768   c->mbs  = a->mbs;
1769   c->nbs  = a->nbs;
1770 
1771   ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr);
1772   for (i=0; i<mbs; i++) {
1773     c->imax[i] = a->imax[i];
1774     c->ilen[i] = a->ilen[i];
1775   }
1776 
1777   /* allocate the matrix space */
1778   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1779   c->singlemalloc = PETSC_TRUE;
1780   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1781   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1782   if (mbs > 0) {
1783     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1784     if (cpvalues == MAT_COPY_VALUES) {
1785       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1786     } else {
1787       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1788     }
1789   }
1790 
1791   c->sorted      = a->sorted;
1792   c->roworiented = a->roworiented;
1793   c->nonew       = a->nonew;
1794 
1795   if (a->diag) {
1796     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1797     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1798     for (i=0; i<mbs; i++) {
1799       c->diag[i] = a->diag[i];
1800     }
1801   } else c->diag  = 0;
1802   c->nz           = a->nz;
1803   c->maxnz        = a->maxnz;
1804   c->solve_work   = 0;
1805   c->mult_work    = 0;
1806   c->free_a       = PETSC_TRUE;
1807   c->free_ij      = PETSC_TRUE;
1808   *B = C;
1809   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1815 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1816 {
1817   Mat_SeqSBAIJ   *a;
1818   Mat            B;
1819   PetscErrorCode ierr;
1820   int            fd;
1821   PetscMPIInt    size;
1822   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1823   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1824   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1825   PetscInt       *masked,nmask,tmp,bs2,ishift;
1826   PetscScalar    *aa;
1827   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1831   bs2  = bs*bs;
1832 
1833   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1834   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1835   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1836   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1837   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1838   M = header[1]; N = header[2]; nz = header[3];
1839 
1840   if (header[3] < 0) {
1841     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1842   }
1843 
1844   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1845 
1846   /*
1847      This code adds extra rows to make sure the number of rows is
1848     divisible by the blocksize
1849   */
1850   mbs        = M/bs;
1851   extra_rows = bs - M + bs*(mbs);
1852   if (extra_rows == bs) extra_rows = 0;
1853   else                  mbs++;
1854   if (extra_rows) {
1855     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1856   }
1857 
1858   /* read in row lengths */
1859   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1860   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1861   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1862 
1863   /* read in column indices */
1864   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1865   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1866   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1867 
1868   /* loop over row lengths determining block row lengths */
1869   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1870   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1871   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1872   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1873   masked   = mask + mbs;
1874   rowcount = 0; nzcount = 0;
1875   for (i=0; i<mbs; i++) {
1876     nmask = 0;
1877     for (j=0; j<bs; j++) {
1878       kmax = rowlengths[rowcount];
1879       for (k=0; k<kmax; k++) {
1880         tmp = jj[nzcount++]/bs;   /* block col. index */
1881         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1882       }
1883       rowcount++;
1884     }
1885     s_browlengths[i] += nmask;
1886 
1887     /* zero out the mask elements we set */
1888     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1889   }
1890 
1891   /* create our matrix */
1892   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1893   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1894   ierr = MatSetType(B,type);CHKERRQ(ierr);
1895   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1896   a = (Mat_SeqSBAIJ*)B->data;
1897 
1898   /* set matrix "i" values */
1899   a->i[0] = 0;
1900   for (i=1; i<= mbs; i++) {
1901     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1902     a->ilen[i-1] = s_browlengths[i-1];
1903   }
1904   a->nz = a->i[mbs];
1905 
1906   /* read in nonzero values */
1907   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1908   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1909   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1910 
1911   /* set "a" and "j" values into matrix */
1912   nzcount = 0; jcount = 0;
1913   for (i=0; i<mbs; i++) {
1914     nzcountb = nzcount;
1915     nmask    = 0;
1916     for (j=0; j<bs; j++) {
1917       kmax = rowlengths[i*bs+j];
1918       for (k=0; k<kmax; k++) {
1919         tmp = jj[nzcount++]/bs; /* block col. index */
1920         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1921       }
1922     }
1923     /* sort the masked values */
1924     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1925 
1926     /* set "j" values into matrix */
1927     maskcount = 1;
1928     for (j=0; j<nmask; j++) {
1929       a->j[jcount++]  = masked[j];
1930       mask[masked[j]] = maskcount++;
1931     }
1932 
1933     /* set "a" values into matrix */
1934     ishift = bs2*a->i[i];
1935     for (j=0; j<bs; j++) {
1936       kmax = rowlengths[i*bs+j];
1937       for (k=0; k<kmax; k++) {
1938         tmp       = jj[nzcountb]/bs ; /* block col. index */
1939         if (tmp >= i){
1940           block     = mask[tmp] - 1;
1941           point     = jj[nzcountb] - bs*tmp;
1942           idx       = ishift + bs2*block + j + bs*point;
1943           a->a[idx] = aa[nzcountb];
1944         }
1945         nzcountb++;
1946       }
1947     }
1948     /* zero out the mask elements we set */
1949     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1950   }
1951   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1952 
1953   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1954   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1955   ierr = PetscFree(aa);CHKERRQ(ierr);
1956   ierr = PetscFree(jj);CHKERRQ(ierr);
1957   ierr = PetscFree(mask);CHKERRQ(ierr);
1958 
1959   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1960   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961   ierr = MatView_Private(B);CHKERRQ(ierr);
1962   *A = B;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1968 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1969 {
1970   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1971   MatScalar      *aa=a->a,*v,*v1;
1972   PetscScalar    *x,*b,*t,sum,d;
1973   PetscErrorCode ierr;
1974   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
1975   PetscInt       nz,nz1,*vj,*vj1,i;
1976 
1977   PetscFunctionBegin;
1978   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
1979 
1980   its = its*lits;
1981   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1982 
1983   if (bs > 1)
1984     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1985 
1986   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1987   if (xx != bb) {
1988     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1989   } else {
1990     b = x;
1991   }
1992 
1993   if (!a->relax_work) {
1994     ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr);
1995   }
1996   t = a->relax_work;
1997 
1998   if (flag & SOR_ZERO_INITIAL_GUESS) {
1999     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2000       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2001 
2002       for (i=0; i<m; i++){
2003         d  = *(aa + ai[i]);  /* diag[i] */
2004         v  = aa + ai[i] + 1;
2005         vj = aj + ai[i] + 1;
2006         nz = ai[i+1] - ai[i] - 1;
2007         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2008         x[i] = omega*t[i]/d;
2009         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2010       }
2011     }
2012 
2013     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2014       if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2015         t = b;
2016       }
2017 
2018       for (i=m-1; i>=0; i--){
2019         d  = *(aa + ai[i]);
2020         v  = aa + ai[i] + 1;
2021         vj = aj + ai[i] + 1;
2022         nz = ai[i+1] - ai[i] - 1;
2023         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2024         sum = t[i];
2025         while (nz--) sum -= x[*vj++]*(*v++);
2026         x[i] =   (1-omega)*x[i] + omega*sum/d;
2027       }
2028       t = a->relax_work;
2029     }
2030     its--;
2031   }
2032 
2033   while (its--) {
2034     /*
2035        forward sweep:
2036        for i=0,...,m-1:
2037          sum[i] = (b[i] - U(i,:)x )/d[i];
2038          x[i]   = (1-omega)x[i] + omega*sum[i];
2039          b      = b - x[i]*U^T(i,:);
2040 
2041     */
2042     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2043       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2044 
2045       for (i=0; i<m; i++){
2046         d  = *(aa + ai[i]);  /* diag[i] */
2047         v  = aa + ai[i] + 1; v1=v;
2048         vj = aj + ai[i] + 1; vj1=vj;
2049         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2050         sum = t[i];
2051         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2052         while (nz1--) sum -= (*v1++)*x[*vj1++];
2053         x[i] = (1-omega)*x[i] + omega*sum/d;
2054         while (nz--) t[*vj++] -= x[i]*(*v++);
2055       }
2056     }
2057 
2058     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2059       /*
2060        backward sweep:
2061        b = b - x[i]*U^T(i,:), i=0,...,n-2
2062        for i=m-1,...,0:
2063          sum[i] = (b[i] - U(i,:)x )/d[i];
2064          x[i]   = (1-omega)x[i] + omega*sum[i];
2065       */
2066       /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2067       ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr);
2068 
2069       for (i=0; i<m-1; i++){  /* update rhs */
2070         v  = aa + ai[i] + 1;
2071         vj = aj + ai[i] + 1;
2072         nz = ai[i+1] - ai[i] - 1;
2073         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2074         while (nz--) t[*vj++] -= x[i]*(*v++);
2075       }
2076       for (i=m-1; i>=0; i--){
2077         d  = *(aa + ai[i]);
2078         v  = aa + ai[i] + 1;
2079         vj = aj + ai[i] + 1;
2080         nz = ai[i+1] - ai[i] - 1;
2081         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2082         sum = t[i];
2083         while (nz--) sum -= x[*vj++]*(*v++);
2084         x[i] =   (1-omega)*x[i] + omega*sum/d;
2085       }
2086     }
2087   }
2088 
2089   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2090   if (bb != xx) {
2091     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays"
2098 /*@
2099      MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2100               (upper triangular entries in CSR format) provided by the user.
2101 
2102      Collective on MPI_Comm
2103 
2104    Input Parameters:
2105 +  comm - must be an MPI communicator of size 1
2106 .  bs - size of block
2107 .  m - number of rows
2108 .  n - number of columns
2109 .  i - row indices
2110 .  j - column indices
2111 -  a - matrix values
2112 
2113    Output Parameter:
2114 .  mat - the matrix
2115 
2116    Level: intermediate
2117 
2118    Notes:
2119        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2120     once the matrix is destroyed
2121 
2122        You cannot set new nonzero locations into this matrix, that will generate an error.
2123 
2124        The i and j indices are 0 based
2125 
2126 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2127 
2128 @*/
2129 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2130 {
2131   PetscErrorCode ierr;
2132   PetscInt       ii;
2133   Mat_SeqSBAIJ   *sbaij;
2134 
2135   PetscFunctionBegin;
2136   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2137   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2138 
2139   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2140   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2141   ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr);
2142   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2143   sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2144   ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr);
2145 
2146   sbaij->i = i;
2147   sbaij->j = j;
2148   sbaij->a = a;
2149   sbaij->singlemalloc = PETSC_FALSE;
2150   sbaij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2151   sbaij->free_a       = PETSC_FALSE;
2152   sbaij->free_ij      = PETSC_FALSE;
2153 
2154   for (ii=0; ii<m; ii++) {
2155     sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2156 #if defined(PETSC_USE_DEBUG)
2157     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2158 #endif
2159   }
2160 #if defined(PETSC_USE_DEBUG)
2161   for (ii=0; ii<sbaij->i[m]; ii++) {
2162     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2163     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2164   }
2165 #endif
2166 
2167   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 
2173 
2174 
2175 
2176