xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 1d2e40055fe3bf783df36eeeccf2d7d5fcfab0fd)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i,mbs = a->mbs;
40 
41   PetscFunctionBegin;
42   if (a->diag) PetscFunctionReturn(0);
43 
44   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
45   ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
46   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNCT__
51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
53 {
54   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
55   PetscInt     n = a->mbs,i;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60 
61   if (oshift == 1) {
62     /* temporarily add 1 to i and j indices */
63     PetscInt nz = a->i[n];
64     for (i=0; i<nz; i++) a->j[i]++;
65     for (i=0; i<n+1; i++) a->i[i]++;
66     *ia = a->i; *ja = a->j;
67   } else {
68     *ia = a->i; *ja = a->j;
69   }
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
76 {
77   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
78   PetscInt     i,n = a->mbs;
79 
80   PetscFunctionBegin;
81   if (!ia) PetscFunctionReturn(0);
82 
83   if (oshift == 1) {
84     PetscInt nz = a->i[n]-1;
85     for (i=0; i<nz; i++) a->j[i]--;
86     for (i=0; i<n+1; i++) a->i[i]--;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
94 {
95   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99 #if defined(PETSC_USE_LOG)
100   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz);
101 #endif
102   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
103   if (a->row) {
104     ierr = ISDestroy(a->row);CHKERRQ(ierr);
105   }
106   ierr = PetscFree(a->diag);CHKERRQ(ierr);
107   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
108   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
109   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
110   ierr = PetscFree(a->solves_work);CHKERRQ(ierr);
111   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
112   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
113   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
114 
115   ierr = PetscFree(a->inew);CHKERRQ(ierr);
116   ierr = PetscFree(a);CHKERRQ(ierr);
117 
118   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
119   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
120   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
121   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
122   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
129 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
130 {
131   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
132   PetscErrorCode ierr;
133 
134   PetscFunctionBegin;
135   switch (op) {
136   case MAT_ROW_ORIENTED:
137     a->roworiented = PETSC_TRUE;
138     break;
139   case MAT_COLUMN_ORIENTED:
140     a->roworiented = PETSC_FALSE;
141     break;
142   case MAT_COLUMNS_SORTED:
143     a->sorted = PETSC_TRUE;
144     break;
145   case MAT_COLUMNS_UNSORTED:
146     a->sorted = PETSC_FALSE;
147     break;
148   case MAT_KEEP_ZEROED_ROWS:
149     a->keepzeroedrows = PETSC_TRUE;
150     break;
151   case MAT_NO_NEW_NONZERO_LOCATIONS:
152     a->nonew = 1;
153     break;
154   case MAT_NEW_NONZERO_LOCATION_ERR:
155     a->nonew = -1;
156     break;
157   case MAT_NEW_NONZERO_ALLOCATION_ERR:
158     a->nonew = -2;
159     break;
160   case MAT_YES_NEW_NONZERO_LOCATIONS:
161     a->nonew = 0;
162     break;
163   case MAT_ROWS_SORTED:
164   case MAT_ROWS_UNSORTED:
165   case MAT_YES_NEW_DIAGONALS:
166   case MAT_IGNORE_OFF_PROC_ENTRIES:
167   case MAT_USE_HASH_TABLE:
168     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
169     break;
170   case MAT_NO_NEW_DIAGONALS:
171     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
172   case MAT_NOT_SYMMETRIC:
173   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
174   case MAT_HERMITIAN:
175     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
176   case MAT_SYMMETRIC:
177   case MAT_STRUCTURALLY_SYMMETRIC:
178   case MAT_NOT_HERMITIAN:
179   case MAT_SYMMETRY_ETERNAL:
180   case MAT_NOT_SYMMETRY_ETERNAL:
181   case MAT_IGNORE_LOWER_TRIANGULAR:
182     a->ignore_ltriangular = PETSC_TRUE;
183     break;
184   case MAT_ERROR_LOWER_TRIANGULAR:
185     a->ignore_ltriangular = PETSC_FALSE;
186     break;
187   case MAT_GETROW_UPPERTRIANGULAR:
188     a->getrow_utriangular = PETSC_TRUE;
189     break;
190   default:
191     SETERRQ(PETSC_ERR_SUP,"unknown option");
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
198 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
199 {
200   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
201   PetscErrorCode ierr;
202   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
203   MatScalar      *aa,*aa_i;
204   PetscScalar    *v_i;
205 
206   PetscFunctionBegin;
207   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()");
208   /* Get the upper triangular part of the row */
209   bs  = A->rmap.bs;
210   ai  = a->i;
211   aj  = a->j;
212   aa  = a->a;
213   bs2 = a->bs2;
214 
215   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
216 
217   bn  = row/bs;   /* Block number */
218   bp  = row % bs; /* Block position */
219   M   = ai[bn+1] - ai[bn];
220   *ncols = bs*M;
221 
222   if (v) {
223     *v = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         v_i  = *v + i*bs;
228         aa_i = aa + bs2*(ai[bn] + i);
229         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
230       }
231     }
232   }
233 
234   if (cols) {
235     *cols = 0;
236     if (*ncols) {
237       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
238       for (i=0; i<M; i++) { /* for each block in the block row */
239         cols_i = *cols + i*bs;
240         itmp  = bs*aj[ai[bn] + i];
241         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
242       }
243     }
244   }
245 
246   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
247   /* this segment is currently removed, so only entries in the upper triangle are obtained */
248 #ifdef column_search
249   v_i    = *v    + M*bs;
250   cols_i = *cols + M*bs;
251   for (i=0; i<bn; i++){ /* for each block row */
252     M = ai[i+1] - ai[i];
253     for (j=0; j<M; j++){
254       itmp = aj[ai[i] + j];    /* block column value */
255       if (itmp == bn){
256         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
257         for (k=0; k<bs; k++) {
258           *cols_i++ = i*bs+k;
259           *v_i++    = aa_i[k];
260         }
261         *ncols += bs;
262         break;
263       }
264     }
265   }
266 #endif
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
273 {
274   PetscErrorCode ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
278   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
284 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
285 {
286   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
287 
288   PetscFunctionBegin;
289   a->getrow_utriangular = PETSC_TRUE;
290   PetscFunctionReturn(0);
291 }
292 #undef __FUNCT__
293 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
294 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
295 {
296   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
297 
298   PetscFunctionBegin;
299   a->getrow_utriangular = PETSC_FALSE;
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
305 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
306 {
307   PetscErrorCode ierr;
308   PetscFunctionBegin;
309   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
315 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
316 {
317   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
318   PetscErrorCode    ierr;
319   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
320   const char        *name;
321   PetscViewerFormat format;
322 
323   PetscFunctionBegin;
324   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
325   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
326   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
327     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
328   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
329     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
330   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
331     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
332     for (i=0; i<a->mbs; i++) {
333       for (j=0; j<bs; j++) {
334         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
335         for (k=a->i[i]; k<a->i[i+1]; k++) {
336           for (l=0; l<bs; l++) {
337 #if defined(PETSC_USE_COMPLEX)
338             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
339               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
340                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
341             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
342               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
343                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
344             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
346             }
347 #else
348             if (a->a[bs2*k + l*bs + j] != 0.0) {
349               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
350             }
351 #endif
352           }
353         }
354         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
355       }
356     }
357     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
358   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
359      PetscFunctionReturn(0);
360   } else {
361     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
362     for (i=0; i<a->mbs; i++) {
363       for (j=0; j<bs; j++) {
364         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
365         for (k=a->i[i]; k<a->i[i+1]; k++) {
366           for (l=0; l<bs; l++) {
367 #if defined(PETSC_USE_COMPLEX)
368             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
369               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
370                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
371             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
373                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else {
375               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
376             }
377 #else
378             ierr = PetscViewerASCIIPrintf(viewer," %D %G ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
379 #endif
380           }
381         }
382         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
383       }
384     }
385     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
386   }
387   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
393 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
394 {
395   Mat            A = (Mat) Aa;
396   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
397   PetscErrorCode ierr;
398   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
399   PetscMPIInt    rank;
400   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
401   MatScalar      *aa;
402   MPI_Comm       comm;
403   PetscViewer    viewer;
404 
405   PetscFunctionBegin;
406   /*
407     This is nasty. If this is called from an originally parallel matrix
408     then all processes call this,but only the first has the matrix so the
409     rest should return immediately.
410   */
411   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
412   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
413   if (rank) PetscFunctionReturn(0);
414 
415   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
416 
417   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
418   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
419 
420   /* loop over matrix elements drawing boxes */
421   color = PETSC_DRAW_BLUE;
422   for (i=0,row=0; i<mbs; i++,row+=bs) {
423     for (j=a->i[i]; j<a->i[i+1]; j++) {
424       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
425       x_l = a->j[j]*bs; x_r = x_l + 1.0;
426       aa = a->a + j*bs2;
427       for (k=0; k<bs; k++) {
428         for (l=0; l<bs; l++) {
429           if (PetscRealPart(*aa++) >=  0.) continue;
430           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
431         }
432       }
433     }
434   }
435   color = PETSC_DRAW_CYAN;
436   for (i=0,row=0; i<mbs; i++,row+=bs) {
437     for (j=a->i[i]; j<a->i[i+1]; j++) {
438       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
439       x_l = a->j[j]*bs; x_r = x_l + 1.0;
440       aa = a->a + j*bs2;
441       for (k=0; k<bs; k++) {
442         for (l=0; l<bs; l++) {
443           if (PetscRealPart(*aa++) != 0.) continue;
444           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
445         }
446       }
447     }
448   }
449 
450   color = PETSC_DRAW_RED;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) <= 0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
469 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
470 {
471   PetscErrorCode ierr;
472   PetscReal      xl,yl,xr,yr,w,h;
473   PetscDraw      draw;
474   PetscTruth     isnull;
475 
476   PetscFunctionBegin;
477 
478   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
479   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
480 
481   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
482   xr  = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
483   xr += w;    yr += h;  xl = -w;     yl = -h;
484   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
485   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
486   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
487   PetscFunctionReturn(0);
488 }
489 
490 #undef __FUNCT__
491 #define __FUNCT__ "MatView_SeqSBAIJ"
492 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
493 {
494   PetscErrorCode ierr;
495   PetscTruth     iascii,isdraw;
496 
497   PetscFunctionBegin;
498   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
499   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
500   if (iascii){
501     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
502   } else if (isdraw) {
503     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
504   } else {
505     Mat B;
506     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
507     ierr = MatView(B,viewer);CHKERRQ(ierr);
508     ierr = MatDestroy(B);CHKERRQ(ierr);
509   }
510   PetscFunctionReturn(0);
511 }
512 
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
516 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
517 {
518   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
519   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
520   PetscInt     *ai = a->i,*ailen = a->ilen;
521   PetscInt     brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
522   MatScalar    *ap,*aa = a->a,zero = 0.0;
523 
524   PetscFunctionBegin;
525   for (k=0; k<m; k++) { /* loop over rows */
526     row  = im[k]; brow = row/bs;
527     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
528     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
529     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
530     nrow = ailen[brow];
531     for (l=0; l<n; l++) { /* loop over columns */
532       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
533       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
534       col  = in[l] ;
535       bcol = col/bs;
536       cidx = col%bs;
537       ridx = row%bs;
538       high = nrow;
539       low  = 0; /* assume unsorted */
540       while (high-low > 5) {
541         t = (low+high)/2;
542         if (rp[t] > bcol) high = t;
543         else             low  = t;
544       }
545       for (i=low; i<high; i++) {
546         if (rp[i] > bcol) break;
547         if (rp[i] == bcol) {
548           *v++ = ap[bs2*i+bs*cidx+ridx];
549           goto finished;
550         }
551       }
552       *v++ = zero;
553        finished:;
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
562 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
563 {
564   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
565   PetscErrorCode  ierr;
566   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
567   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
568   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
569   PetscTruth      roworiented=a->roworiented;
570   const MatScalar *value = v;
571   MatScalar       *ap,*aa = a->a,*bap;
572 
573   PetscFunctionBegin;
574   if (roworiented) {
575     stepval = (n-1)*bs;
576   } else {
577     stepval = (m-1)*bs;
578   }
579   for (k=0; k<m; k++) { /* loop over added rows */
580     row  = im[k];
581     if (row < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
584 #endif
585     rp   = aj + ai[row];
586     ap   = aa + bs2*ai[row];
587     rmax = imax[row];
588     nrow = ailen[row];
589     low  = 0;
590     high = nrow;
591     for (l=0; l<n; l++) { /* loop over added columns */
592       if (in[l] < 0) continue;
593       col = in[l];
594 #if defined(PETSC_USE_DEBUG)
595       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
596 #endif
597       if (col < row) continue; /* ignore lower triangular block */
598       if (roworiented) {
599         value = v + k*(stepval+bs)*bs + l*bs;
600       } else {
601         value = v + l*(stepval+bs)*bs + k*bs;
602       }
603       if (col <= lastcol) low = 0; else high = nrow;
604       lastcol = col;
605       while (high-low > 7) {
606         t = (low+high)/2;
607         if (rp[t] > col) high = t;
608         else             low  = t;
609       }
610       for (i=low; i<high; i++) {
611         if (rp[i] > col) break;
612         if (rp[i] == col) {
613           bap  = ap +  bs2*i;
614           if (roworiented) {
615             if (is == ADD_VALUES) {
616               for (ii=0; ii<bs; ii++,value+=stepval) {
617                 for (jj=ii; jj<bs2; jj+=bs) {
618                   bap[jj] += *value++;
619                 }
620               }
621             } else {
622               for (ii=0; ii<bs; ii++,value+=stepval) {
623                 for (jj=ii; jj<bs2; jj+=bs) {
624                   bap[jj] = *value++;
625                 }
626                }
627             }
628           } else {
629             if (is == ADD_VALUES) {
630               for (ii=0; ii<bs; ii++,value+=stepval) {
631                 for (jj=0; jj<bs; jj++) {
632                   *bap++ += *value++;
633                 }
634               }
635             } else {
636               for (ii=0; ii<bs; ii++,value+=stepval) {
637                 for (jj=0; jj<bs; jj++) {
638                   *bap++  = *value++;
639                 }
640               }
641             }
642           }
643           goto noinsert2;
644         }
645       }
646       if (nonew == 1) goto noinsert2;
647       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
648       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
649       N = nrow++ - 1; high++;
650       /* shift up all the later entries in this row */
651       for (ii=N; ii>=i; ii--) {
652         rp[ii+1] = rp[ii];
653         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
654       }
655       if (N >= i) {
656         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
657       }
658       rp[i] = col;
659       bap   = ap +  bs2*i;
660       if (roworiented) {
661         for (ii=0; ii<bs; ii++,value+=stepval) {
662           for (jj=ii; jj<bs2; jj+=bs) {
663             bap[jj] = *value++;
664           }
665         }
666       } else {
667         for (ii=0; ii<bs; ii++,value+=stepval) {
668           for (jj=0; jj<bs; jj++) {
669             *bap++  = *value++;
670           }
671         }
672        }
673     noinsert2:;
674       low = i;
675     }
676     ailen[row] = nrow;
677   }
678    PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
683 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
684 {
685   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
686   PetscErrorCode ierr;
687   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
688   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
689   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
690   MatScalar      *aa = a->a,*ap;
691 
692   PetscFunctionBegin;
693   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
694 
695   if (m) rmax = ailen[0];
696   for (i=1; i<mbs; i++) {
697     /* move each row back by the amount of empty slots (fshift) before it*/
698     fshift += imax[i-1] - ailen[i-1];
699      rmax   = PetscMax(rmax,ailen[i]);
700      if (fshift) {
701        ip = aj + ai[i]; ap = aa + bs2*ai[i];
702        N = ailen[i];
703        for (j=0; j<N; j++) {
704          ip[j-fshift] = ip[j];
705          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
706        }
707      }
708      ai[i] = ai[i-1] + ailen[i-1];
709   }
710   if (mbs) {
711     fshift += imax[mbs-1] - ailen[mbs-1];
712      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
713   }
714   /* reset ilen and imax for each row */
715   for (i=0; i<mbs; i++) {
716     ailen[i] = imax[i] = ai[i+1] - ai[i];
717   }
718   a->nz = ai[mbs];
719 
720   /* diagonals may have moved, reset it */
721   if (a->diag) {
722     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
723   }
724   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
725   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
726   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
727   a->reallocs          = 0;
728   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
729   PetscFunctionReturn(0);
730 }
731 
732 /*
733    This function returns an array of flags which indicate the locations of contiguous
734    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
735    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
736    Assume: sizes should be long enough to hold all the values.
737 */
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
740 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
741 {
742   PetscInt   i,j,k,row;
743   PetscTruth flg;
744 
745   PetscFunctionBegin;
746    for (i=0,j=0; i<n; j++) {
747      row = idx[i];
748      if (row%bs!=0) { /* Not the begining of a block */
749        sizes[j] = 1;
750        i++;
751      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
752        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
753        i++;
754      } else { /* Begining of the block, so check if the complete block exists */
755        flg = PETSC_TRUE;
756        for (k=1; k<bs; k++) {
757          if (row+k != idx[i+k]) { /* break in the block */
758            flg = PETSC_FALSE;
759            break;
760          }
761        }
762        if (flg) { /* No break in the bs */
763          sizes[j] = bs;
764          i+= bs;
765        } else {
766          sizes[j] = 1;
767          i++;
768        }
769      }
770    }
771    *bs_max = j;
772    PetscFunctionReturn(0);
773 }
774 
775 
776 /* Only add/insert a(i,j) with i<=j (blocks).
777    Any a(i,j) with i>j input by user is ingored.
778 */
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
782 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
783 {
784   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
785   PetscErrorCode ierr;
786   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
787   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
788   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
789   PetscInt       ridx,cidx,bs2=a->bs2;
790   MatScalar      *ap,value,*aa=a->a,*bap;
791 
792   PetscFunctionBegin;
793   for (k=0; k<m; k++) { /* loop over added rows */
794     row  = im[k];       /* row number */
795     brow = row/bs;      /* block row number */
796     if (row < 0) continue;
797 #if defined(PETSC_USE_DEBUG)
798     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
799 #endif
800     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
801     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
802     rmax = imax[brow];  /* maximum space allocated for this row */
803     nrow = ailen[brow]; /* actual length of this row */
804     low  = 0;
805 
806     for (l=0; l<n; l++) { /* loop over added columns */
807       if (in[l] < 0) continue;
808 #if defined(PETSC_USE_DEBUG)
809       if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1);
810 #endif
811       col = in[l];
812       bcol = col/bs;              /* block col number */
813 
814       if (brow > bcol) {
815         if (a->ignore_ltriangular){
816           continue; /* ignore lower triangular values */
817         } else {
818           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
819         }
820       }
821 
822       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
823       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
824         /* element value a(k,l) */
825         if (roworiented) {
826           value = v[l + k*n];
827         } else {
828           value = v[k + l*m];
829         }
830 
831         /* move pointer bap to a(k,l) quickly and add/insert value */
832         if (col <= lastcol) low = 0; high = nrow;
833         lastcol = col;
834         while (high-low > 7) {
835           t = (low+high)/2;
836           if (rp[t] > bcol) high = t;
837           else              low  = t;
838         }
839         for (i=low; i<high; i++) {
840           if (rp[i] > bcol) break;
841           if (rp[i] == bcol) {
842             bap  = ap +  bs2*i + bs*cidx + ridx;
843             if (is == ADD_VALUES) *bap += value;
844             else                  *bap  = value;
845             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
846             if (brow == bcol && ridx < cidx){
847               bap  = ap +  bs2*i + bs*ridx + cidx;
848               if (is == ADD_VALUES) *bap += value;
849               else                  *bap  = value;
850             }
851             goto noinsert1;
852           }
853         }
854 
855         if (nonew == 1) goto noinsert1;
856         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
857         MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
858 
859         N = nrow++ - 1; high++;
860         /* shift up all the later entries in this row */
861         for (ii=N; ii>=i; ii--) {
862           rp[ii+1] = rp[ii];
863           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
864         }
865         if (N>=i) {
866           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
867         }
868         rp[i]                      = bcol;
869         ap[bs2*i + bs*cidx + ridx] = value;
870       noinsert1:;
871         low = i;
872       }
873     }   /* end of loop over added columns */
874     ailen[brow] = nrow;
875   }   /* end of loop over added rows */
876   PetscFunctionReturn(0);
877 }
878 
879 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
880 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
881 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
882 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
883 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
884 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar);
885 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
886 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
887 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
888 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
889 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
890 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
891 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
892 
893 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
894 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
895 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
896 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
897 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
898 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
899 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
900 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
901 
902 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
903 
904 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
905 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
906 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
907 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
908 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
909 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
910 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
911 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
912 
913 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*);
914 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*);
915 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*);
916 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*);
917 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*);
918 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*);
919 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*);
920 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*);
921 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
922 
923 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
924 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
925 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
926 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
927 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
928 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
929 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
930 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
931 
932 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
933 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
934 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
935 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
936 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
937 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
938 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
939 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
940 
941 #ifdef HAVE_MatICCFactor
942 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
943 #undef __FUNCT__
944 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
945 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
946 {
947   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
948   Mat            outA;
949   PetscErrorCode ierr;
950   PetscTruth     row_identity,col_identity;
951 
952   PetscFunctionBegin;
953   outA          = inA;
954   inA->factor   = FACTOR_CHOLESKY;
955 
956   if (!a->diag) {
957     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
958   }
959   /*
960     Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
961     for ILU(0) factorization with natural ordering
962   */
963   switch (a->rmap.bs) {
964   case 1:
965     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
966     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
967     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
968     ierr = PetscInfo((inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
969   case 2:
970     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
971     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
972     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
973     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
974     break;
975   case 3:
976      inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
977      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
978      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
979      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
980      break;
981   case 4:
982     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
983     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
984     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
985     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
986     break;
987   case 5:
988     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
989     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
990     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
991     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
992     break;
993   case 6:
994     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
995     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
996     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
997     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
998     break;
999   case 7:
1000     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1001     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1002     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1003     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
1004     break;
1005   default:
1006     a->row        = row;
1007     a->icol       = col;
1008     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1009     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1010 
1011     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1012     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1013     ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1014 
1015     if (!a->solve_work) {
1016       ierr = PetscMalloc((A->rmap.N+a->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1017       ierr = PetscLogObjectMemory(inA,(A->rmap.N+a->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1018     }
1019   }
1020 
1021   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 #endif
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1028 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1029 {
1030   static PetscTruth called = PETSC_FALSE;
1031   MPI_Comm          comm = A->comm;
1032   PetscErrorCode    ierr;
1033 
1034   PetscFunctionBegin;
1035   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1036   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1037   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1038   ierr = (*PetscHelpPrintf)(comm,"  -mat_ignore_lower_triangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr);
1039   ierr = (*PetscHelpPrintf)(comm,"  -mat_getrow_uppertriangular: Enable MatGetRow() for retrieving the upper triangular part of the row\n");CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 EXTERN_C_BEGIN
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1046 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1047 {
1048   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1049   PetscInt     i,nz,n;
1050 
1051   PetscFunctionBegin;
1052   nz = baij->maxnz;
1053   n  = mat->cmap.n;
1054   for (i=0; i<nz; i++) {
1055     baij->j[i] = indices[i];
1056   }
1057    baij->nz = nz;
1058    for (i=0; i<n; i++) {
1059      baij->ilen[i] = baij->imax[i];
1060    }
1061    PetscFunctionReturn(0);
1062 }
1063 EXTERN_C_END
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1067 /*@
1068   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1069   in the matrix.
1070 
1071   Input Parameters:
1072   +  mat     - the SeqSBAIJ matrix
1073   -  indices - the column indices
1074 
1075   Level: advanced
1076 
1077   Notes:
1078   This can be called if you have precomputed the nonzero structure of the
1079   matrix and want to provide it to the matrix object to improve the performance
1080   of the MatSetValues() operation.
1081 
1082   You MUST have set the correct numbers of nonzeros per row in the call to
1083   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1084 
1085   MUST be called before any calls to MatSetValues()
1086 
1087   .seealso: MatCreateSeqSBAIJ
1088 @*/
1089 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1090 {
1091   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1092 
1093   PetscFunctionBegin;
1094   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1095   PetscValidPointer(indices,2);
1096   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1097   if (f) {
1098     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1099   } else {
1100     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1101   }
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1107 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1108 {
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   /* If the two matrices have the same copy implementation, use fast copy. */
1113   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1114     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1115     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1116 
1117     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1118       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1119     }
1120     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1121   } else {
1122     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1123     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1124     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1125   }
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1131 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1132 {
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1142 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1143 {
1144   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1145   PetscFunctionBegin;
1146   *array = a->a;
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1152 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1153 {
1154   PetscFunctionBegin;
1155   PetscFunctionReturn(0);
1156  }
1157 
1158 #include "petscblaslapack.h"
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1161 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1162 {
1163   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1164   PetscErrorCode ierr;
1165   PetscInt       i,bs=Y->rmap.bs,bs2,j;
1166   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1167 
1168   PetscFunctionBegin;
1169   if (str == SAME_NONZERO_PATTERN) {
1170     PetscScalar alpha = a;
1171     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1172   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1173     if (y->xtoy && y->XtoY != X) {
1174       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1175       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1176     }
1177     if (!y->xtoy) { /* get xtoy */
1178       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1179       y->XtoY = X;
1180     }
1181     bs2 = bs*bs;
1182     for (i=0; i<x->nz; i++) {
1183       j = 0;
1184       while (j < bs2){
1185         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1186         j++;
1187       }
1188     }
1189     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1190   } else {
1191     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1192     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1193     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1200 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1201 {
1202   PetscFunctionBegin;
1203   *flg = PETSC_TRUE;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1209 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1210 {
1211    PetscFunctionBegin;
1212    *flg = PETSC_TRUE;
1213    PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1218 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1219  {
1220    PetscFunctionBegin;
1221    *flg = PETSC_FALSE;
1222    PetscFunctionReturn(0);
1223  }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1227 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1228 {
1229   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1230   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1231   PetscScalar    *aa = a->a;
1232 
1233   PetscFunctionBegin;
1234   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1240 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1241 {
1242   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1243   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1244   PetscScalar    *aa = a->a;
1245 
1246   PetscFunctionBegin;
1247   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 /* -------------------------------------------------------------------*/
1252 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1253        MatGetRow_SeqSBAIJ,
1254        MatRestoreRow_SeqSBAIJ,
1255        MatMult_SeqSBAIJ_N,
1256 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1257        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1258        MatMultAdd_SeqSBAIJ_N,
1259        MatSolve_SeqSBAIJ_N,
1260        0,
1261        0,
1262 /*10*/ 0,
1263        0,
1264        MatCholeskyFactor_SeqSBAIJ,
1265        MatRelax_SeqSBAIJ,
1266        MatTranspose_SeqSBAIJ,
1267 /*15*/ MatGetInfo_SeqSBAIJ,
1268        MatEqual_SeqSBAIJ,
1269        MatGetDiagonal_SeqSBAIJ,
1270        MatDiagonalScale_SeqSBAIJ,
1271        MatNorm_SeqSBAIJ,
1272 /*20*/ 0,
1273        MatAssemblyEnd_SeqSBAIJ,
1274        0,
1275        MatSetOption_SeqSBAIJ,
1276        MatZeroEntries_SeqSBAIJ,
1277 /*25*/ 0,
1278        0,
1279        0,
1280        MatCholeskyFactorSymbolic_SeqSBAIJ,
1281        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1282 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1283        0,
1284        MatICCFactorSymbolic_SeqSBAIJ,
1285        MatGetArray_SeqSBAIJ,
1286        MatRestoreArray_SeqSBAIJ,
1287 /*35*/ MatDuplicate_SeqSBAIJ,
1288        0,
1289        0,
1290        0,
1291        0,
1292 /*40*/ MatAXPY_SeqSBAIJ,
1293        MatGetSubMatrices_SeqSBAIJ,
1294        MatIncreaseOverlap_SeqSBAIJ,
1295        MatGetValues_SeqSBAIJ,
1296        MatCopy_SeqSBAIJ,
1297 /*45*/ MatPrintHelp_SeqSBAIJ,
1298        MatScale_SeqSBAIJ,
1299        0,
1300        0,
1301        0,
1302 /*50*/ 0,
1303        MatGetRowIJ_SeqSBAIJ,
1304        MatRestoreRowIJ_SeqSBAIJ,
1305        0,
1306        0,
1307 /*55*/ 0,
1308        0,
1309        0,
1310        0,
1311        MatSetValuesBlocked_SeqSBAIJ,
1312 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1313        0,
1314        0,
1315        0,
1316        0,
1317 /*65*/ 0,
1318        0,
1319        0,
1320        0,
1321        0,
1322 /*70*/ MatGetRowMax_SeqSBAIJ,
1323        0,
1324        0,
1325        0,
1326        0,
1327 /*75*/ 0,
1328        0,
1329        0,
1330        0,
1331        0,
1332 /*80*/ 0,
1333        0,
1334        0,
1335 #if !defined(PETSC_USE_COMPLEX)
1336        MatGetInertia_SeqSBAIJ,
1337 #else
1338        0,
1339 #endif
1340        MatLoad_SeqSBAIJ,
1341 /*85*/ MatIsSymmetric_SeqSBAIJ,
1342        MatIsHermitian_SeqSBAIJ,
1343        MatIsStructurallySymmetric_SeqSBAIJ,
1344        0,
1345        0,
1346 /*90*/ 0,
1347        0,
1348        0,
1349        0,
1350        0,
1351 /*95*/ 0,
1352        0,
1353        0,
1354        0,
1355        0,
1356 /*100*/0,
1357        0,
1358        0,
1359        0,
1360        0,
1361 /*105*/0,
1362        MatRealPart_SeqSBAIJ,
1363        MatImaginaryPart_SeqSBAIJ,
1364        MatGetRowUpperTriangular_SeqSBAIJ,
1365        MatRestoreRowUpperTriangular_SeqSBAIJ
1366 };
1367 
1368 EXTERN_C_BEGIN
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1371 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1372 {
1373   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1374   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   if (aij->nonew != 1) {
1379     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1380   }
1381 
1382   /* allocate space for values if not already there */
1383   if (!aij->saved_values) {
1384     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1385   }
1386 
1387   /* copy values over */
1388   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 EXTERN_C_END
1392 
1393 EXTERN_C_BEGIN
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1396 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1397 {
1398   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1399   PetscErrorCode ierr;
1400   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
1401 
1402   PetscFunctionBegin;
1403   if (aij->nonew != 1) {
1404     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1405   }
1406   if (!aij->saved_values) {
1407     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1408   }
1409 
1410   /* copy values over */
1411   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 EXTERN_C_END
1415 
1416 EXTERN_C_BEGIN
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1419 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1420 {
1421   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1422   PetscErrorCode ierr;
1423   PetscInt       i,mbs,bs2;
1424   PetscTruth     skipallocation = PETSC_FALSE,flg;
1425 
1426   PetscFunctionBegin;
1427   B->preallocated = PETSC_TRUE;
1428   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1429   B->rmap.bs = B->cmap.bs = bs;
1430   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1431   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1432 
1433   mbs  = B->rmap.N/bs;
1434   bs2  = bs*bs;
1435 
1436   if (mbs*bs != B->rmap.N) {
1437     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1438   }
1439 
1440   if (nz == MAT_SKIP_ALLOCATION) {
1441     skipallocation = PETSC_TRUE;
1442     nz             = 0;
1443   }
1444 
1445   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1446   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1447   if (nnz) {
1448     for (i=0; i<mbs; i++) {
1449       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1450       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1451     }
1452   }
1453 
1454   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1455   if (!flg) {
1456     switch (bs) {
1457     case 1:
1458       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1459       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1460       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1461       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1462       B->ops->mult             = MatMult_SeqSBAIJ_1;
1463       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1464       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1465       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1466       break;
1467     case 2:
1468       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1469       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1470       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1471       B->ops->mult             = MatMult_SeqSBAIJ_2;
1472       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1473       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1474       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1475       break;
1476     case 3:
1477       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1478       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1479       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1480       B->ops->mult             = MatMult_SeqSBAIJ_3;
1481       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1482       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1483       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1484       break;
1485     case 4:
1486       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1487       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1488       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1489       B->ops->mult             = MatMult_SeqSBAIJ_4;
1490       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1491       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1492       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1493       break;
1494     case 5:
1495       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1496       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1497       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1498       B->ops->mult             = MatMult_SeqSBAIJ_5;
1499       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1500       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1501       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1502       break;
1503     case 6:
1504       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1505       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1506       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1507       B->ops->mult             = MatMult_SeqSBAIJ_6;
1508       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1509       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1510       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1511       break;
1512     case 7:
1513       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1514       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1515       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1516       B->ops->mult             = MatMult_SeqSBAIJ_7;
1517       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1518       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1519       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1520       break;
1521     }
1522   }
1523 
1524   b->mbs = mbs;
1525   b->nbs = mbs;
1526   if (!skipallocation) {
1527     /* b->ilen will count nonzeros in each block row so far. */
1528     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1529     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1530     if (!nnz) {
1531       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1532       else if (nz <= 0)        nz = 1;
1533       for (i=0; i<mbs; i++) {
1534         b->imax[i] = nz;
1535       }
1536       nz = nz*mbs; /* total nz */
1537     } else {
1538       nz = 0;
1539       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1540     }
1541     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1542 
1543     /* allocate the matrix space */
1544     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
1545     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1546     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1547     b->singlemalloc = PETSC_TRUE;
1548 
1549     /* pointer to beginning of each row */
1550     b->i[0] = 0;
1551     for (i=1; i<mbs+1; i++) {
1552       b->i[i] = b->i[i-1] + b->imax[i-1];
1553     }
1554   }
1555 
1556   B->rmap.bs               = bs;
1557   b->bs2              = bs2;
1558   b->nz             = 0;
1559   b->maxnz          = nz*bs2;
1560 
1561   b->inew             = 0;
1562   b->jnew             = 0;
1563   b->anew             = 0;
1564   b->a2anew           = 0;
1565   b->permute          = PETSC_FALSE;
1566   PetscFunctionReturn(0);
1567 }
1568 EXTERN_C_END
1569 
1570 EXTERN_C_BEGIN
1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1572 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1573 EXTERN_C_END
1574 
1575 /*MC
1576   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1577   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1578 
1579   Options Database Keys:
1580   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1581 
1582   Level: beginner
1583 
1584   .seealso: MatCreateSeqSBAIJ
1585 M*/
1586 
1587 EXTERN_C_BEGIN
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1590 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1591 {
1592   Mat_SeqSBAIJ   *b;
1593   PetscErrorCode ierr;
1594   PetscMPIInt    size;
1595   PetscTruth     flg;
1596 
1597   PetscFunctionBegin;
1598   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1599   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1600 
1601   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1602   B->data = (void*)b;
1603   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1604   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1605   B->ops->view        = MatView_SeqSBAIJ;
1606   B->factor           = 0;
1607   B->mapping          = 0;
1608   b->row              = 0;
1609   b->icol             = 0;
1610   b->reallocs         = 0;
1611   b->saved_values     = 0;
1612 
1613 
1614   b->sorted           = PETSC_FALSE;
1615   b->roworiented      = PETSC_TRUE;
1616   b->nonew            = 0;
1617   b->diag             = 0;
1618   b->solve_work       = 0;
1619   b->mult_work        = 0;
1620   B->spptr            = 0;
1621   b->keepzeroedrows   = PETSC_FALSE;
1622   b->xtoy             = 0;
1623   b->XtoY             = 0;
1624 
1625   b->inew             = 0;
1626   b->jnew             = 0;
1627   b->anew             = 0;
1628   b->a2anew           = 0;
1629   b->permute          = PETSC_FALSE;
1630 
1631   b->ignore_ltriangular = PETSC_FALSE;
1632   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1633   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1634 
1635   b->getrow_utriangular = PETSC_FALSE;
1636   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1637   if (flg) b->getrow_utriangular = PETSC_TRUE;
1638 
1639   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1640                                      "MatStoreValues_SeqSBAIJ",
1641                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1643                                      "MatRetrieveValues_SeqSBAIJ",
1644                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1646                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1647                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1649                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1650                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1652                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1653                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1655                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1656                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1657 
1658   B->symmetric                  = PETSC_TRUE;
1659   B->structurally_symmetric     = PETSC_TRUE;
1660   B->symmetric_set              = PETSC_TRUE;
1661   B->structurally_symmetric_set = PETSC_TRUE;
1662   PetscFunctionReturn(0);
1663 }
1664 EXTERN_C_END
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1668 /*@C
1669    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1670    compressed row) format.  For good matrix assembly performance the
1671    user should preallocate the matrix storage by setting the parameter nz
1672    (or the array nnz).  By setting these parameters accurately, performance
1673    during matrix assembly can be increased by more than a factor of 50.
1674 
1675    Collective on Mat
1676 
1677    Input Parameters:
1678 +  A - the symmetric matrix
1679 .  bs - size of block
1680 .  nz - number of block nonzeros per block row (same for all rows)
1681 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1682          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1683 
1684    Options Database Keys:
1685 .   -mat_no_unroll - uses code that does not unroll the loops in the
1686                      block calculations (much slower)
1687 .    -mat_block_size - size of the blocks to use
1688 
1689    Level: intermediate
1690 
1691    Notes:
1692    Specify the preallocated storage with either nz or nnz (not both).
1693    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1694    allocation.  For additional details, see the users manual chapter on
1695    matrices.
1696 
1697    If the nnz parameter is given then the nz parameter is ignored
1698 
1699 
1700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1701 @*/
1702 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1703 {
1704   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1708   if (f) {
1709     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1710   }
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatCreateSeqSBAIJ"
1716 /*@C
1717    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1718    compressed row) format.  For good matrix assembly performance the
1719    user should preallocate the matrix storage by setting the parameter nz
1720    (or the array nnz).  By setting these parameters accurately, performance
1721    during matrix assembly can be increased by more than a factor of 50.
1722 
1723    Collective on MPI_Comm
1724 
1725    Input Parameters:
1726 +  comm - MPI communicator, set to PETSC_COMM_SELF
1727 .  bs - size of block
1728 .  m - number of rows, or number of columns
1729 .  nz - number of block nonzeros per block row (same for all rows)
1730 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1731          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1732 
1733    Output Parameter:
1734 .  A - the symmetric matrix
1735 
1736    Options Database Keys:
1737 .   -mat_no_unroll - uses code that does not unroll the loops in the
1738                      block calculations (much slower)
1739 .    -mat_block_size - size of the blocks to use
1740 
1741    Level: intermediate
1742 
1743    Notes:
1744 
1745    Specify the preallocated storage with either nz or nnz (not both).
1746    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1747    allocation.  For additional details, see the users manual chapter on
1748    matrices.
1749 
1750    If the nnz parameter is given then the nz parameter is ignored
1751 
1752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1753 @*/
1754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1755 {
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1760   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1761   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1762   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1768 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1769 {
1770   Mat            C;
1771   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1772   PetscErrorCode ierr;
1773   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1774 
1775   PetscFunctionBegin;
1776   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1777 
1778   *B = 0;
1779   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1780   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
1781   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1782   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1783   c    = (Mat_SeqSBAIJ*)C->data;
1784 
1785   C->preallocated   = PETSC_TRUE;
1786   C->factor         = A->factor;
1787   c->row            = 0;
1788   c->icol           = 0;
1789   c->saved_values   = 0;
1790   c->keepzeroedrows = a->keepzeroedrows;
1791   C->assembled      = PETSC_TRUE;
1792 
1793   ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
1794   ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
1795   c->bs2  = a->bs2;
1796   c->mbs  = a->mbs;
1797   c->nbs  = a->nbs;
1798 
1799   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
1800   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
1801   for (i=0; i<mbs; i++) {
1802     c->imax[i] = a->imax[i];
1803     c->ilen[i] = a->ilen[i];
1804   }
1805 
1806   /* allocate the matrix space */
1807   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1808   c->singlemalloc = PETSC_TRUE;
1809   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1810   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1811   if (mbs > 0) {
1812     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1813     if (cpvalues == MAT_COPY_VALUES) {
1814       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1815     } else {
1816       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1817     }
1818   }
1819 
1820   c->sorted      = a->sorted;
1821   c->roworiented = a->roworiented;
1822   c->nonew       = a->nonew;
1823 
1824   if (a->diag) {
1825     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1826     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1827     for (i=0; i<mbs; i++) {
1828       c->diag[i] = a->diag[i];
1829     }
1830   } else c->diag        = 0;
1831   c->nz               = a->nz;
1832   c->maxnz            = a->maxnz;
1833   c->solve_work         = 0;
1834   c->mult_work          = 0;
1835   *B = C;
1836   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1842 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1843 {
1844   Mat_SeqSBAIJ   *a;
1845   Mat            B;
1846   PetscErrorCode ierr;
1847   int            fd;
1848   PetscMPIInt    size;
1849   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1850   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1851   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1852   PetscInt       *masked,nmask,tmp,bs2,ishift;
1853   PetscScalar    *aa;
1854   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1855 
1856   PetscFunctionBegin;
1857   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1858   bs2  = bs*bs;
1859 
1860   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1861   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1862   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1863   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1864   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1865   M = header[1]; N = header[2]; nz = header[3];
1866 
1867   if (header[3] < 0) {
1868     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1869   }
1870 
1871   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1872 
1873   /*
1874      This code adds extra rows to make sure the number of rows is
1875     divisible by the blocksize
1876   */
1877   mbs        = M/bs;
1878   extra_rows = bs - M + bs*(mbs);
1879   if (extra_rows == bs) extra_rows = 0;
1880   else                  mbs++;
1881   if (extra_rows) {
1882     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1883   }
1884 
1885   /* read in row lengths */
1886   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1887   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1888   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1889 
1890   /* read in column indices */
1891   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1892   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1893   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1894 
1895   /* loop over row lengths determining block row lengths */
1896   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1897   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1898   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1899   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1900   masked   = mask + mbs;
1901   rowcount = 0; nzcount = 0;
1902   for (i=0; i<mbs; i++) {
1903     nmask = 0;
1904     for (j=0; j<bs; j++) {
1905       kmax = rowlengths[rowcount];
1906       for (k=0; k<kmax; k++) {
1907         tmp = jj[nzcount++]/bs;   /* block col. index */
1908         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1909       }
1910       rowcount++;
1911     }
1912     s_browlengths[i] += nmask;
1913 
1914     /* zero out the mask elements we set */
1915     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1916   }
1917 
1918   /* create our matrix */
1919   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1920   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1921   ierr = MatSetType(B,type);CHKERRQ(ierr);
1922   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1923   a = (Mat_SeqSBAIJ*)B->data;
1924 
1925   /* set matrix "i" values */
1926   a->i[0] = 0;
1927   for (i=1; i<= mbs; i++) {
1928     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1929     a->ilen[i-1] = s_browlengths[i-1];
1930   }
1931   a->nz = a->i[mbs];
1932 
1933   /* read in nonzero values */
1934   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1935   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1936   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1937 
1938   /* set "a" and "j" values into matrix */
1939   nzcount = 0; jcount = 0;
1940   for (i=0; i<mbs; i++) {
1941     nzcountb = nzcount;
1942     nmask    = 0;
1943     for (j=0; j<bs; j++) {
1944       kmax = rowlengths[i*bs+j];
1945       for (k=0; k<kmax; k++) {
1946         tmp = jj[nzcount++]/bs; /* block col. index */
1947         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1948       }
1949     }
1950     /* sort the masked values */
1951     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1952 
1953     /* set "j" values into matrix */
1954     maskcount = 1;
1955     for (j=0; j<nmask; j++) {
1956       a->j[jcount++]  = masked[j];
1957       mask[masked[j]] = maskcount++;
1958     }
1959 
1960     /* set "a" values into matrix */
1961     ishift = bs2*a->i[i];
1962     for (j=0; j<bs; j++) {
1963       kmax = rowlengths[i*bs+j];
1964       for (k=0; k<kmax; k++) {
1965         tmp       = jj[nzcountb]/bs ; /* block col. index */
1966         if (tmp >= i){
1967           block     = mask[tmp] - 1;
1968           point     = jj[nzcountb] - bs*tmp;
1969           idx       = ishift + bs2*block + j + bs*point;
1970           a->a[idx] = aa[nzcountb];
1971         }
1972         nzcountb++;
1973       }
1974     }
1975     /* zero out the mask elements we set */
1976     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1977   }
1978   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1979 
1980   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1981   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1982   ierr = PetscFree(aa);CHKERRQ(ierr);
1983   ierr = PetscFree(jj);CHKERRQ(ierr);
1984   ierr = PetscFree(mask);CHKERRQ(ierr);
1985 
1986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   ierr = MatView_Private(B);CHKERRQ(ierr);
1989   *A = B;
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1995 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1996 {
1997   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1998   MatScalar      *aa=a->a,*v,*v1;
1999   PetscScalar    *x,*b,*t,sum,d;
2000   PetscErrorCode ierr;
2001   PetscInt       m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j;
2002   PetscInt       nz,nz1,*vj,*vj1,i;
2003 
2004   PetscFunctionBegin;
2005   its = its*lits;
2006   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2007 
2008   if (bs > 1)
2009     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2010 
2011   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2012   if (xx != bb) {
2013     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2014   } else {
2015     b = x;
2016   }
2017 
2018   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2019 
2020   if (flag & SOR_ZERO_INITIAL_GUESS) {
2021     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2022       for (i=0; i<m; i++)
2023         t[i] = b[i];
2024 
2025       for (i=0; i<m; i++){
2026         d  = *(aa + ai[i]);  /* diag[i] */
2027         v  = aa + ai[i] + 1;
2028         vj = aj + ai[i] + 1;
2029         nz = ai[i+1] - ai[i] - 1;
2030         x[i] = omega*t[i]/d;
2031         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2032         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2033       }
2034     }
2035 
2036     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2037       for (i=0; i<m; i++)
2038         t[i] = b[i];
2039 
2040       for (i=0; i<m-1; i++){  /* update rhs */
2041         v  = aa + ai[i] + 1;
2042         vj = aj + ai[i] + 1;
2043         nz = ai[i+1] - ai[i] - 1;
2044         while (nz--) t[*vj++] -= x[i]*(*v++);
2045         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2046       }
2047       for (i=m-1; i>=0; i--){
2048         d  = *(aa + ai[i]);
2049         v  = aa + ai[i] + 1;
2050         vj = aj + ai[i] + 1;
2051         nz = ai[i+1] - ai[i] - 1;
2052         sum = t[i];
2053         while (nz--) sum -= x[*vj++]*(*v++);
2054         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2055         x[i] =   (1-omega)*x[i] + omega*sum/d;
2056       }
2057     }
2058     its--;
2059   }
2060 
2061   while (its--) {
2062     /*
2063        forward sweep:
2064        for i=0,...,m-1:
2065          sum[i] = (b[i] - U(i,:)x )/d[i];
2066          x[i]   = (1-omega)x[i] + omega*sum[i];
2067          b      = b - x[i]*U^T(i,:);
2068 
2069     */
2070     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2071       for (i=0; i<m; i++)
2072         t[i] = b[i];
2073 
2074       for (i=0; i<m; i++){
2075         d  = *(aa + ai[i]);  /* diag[i] */
2076         v  = aa + ai[i] + 1; v1=v;
2077         vj = aj + ai[i] + 1; vj1=vj;
2078         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2079         sum = t[i];
2080         while (nz1--) sum -= (*v1++)*x[*vj1++];
2081         x[i] = (1-omega)*x[i] + omega*sum/d;
2082         while (nz--) t[*vj++] -= x[i]*(*v++);
2083         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2084       }
2085     }
2086 
2087   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2088       /*
2089        backward sweep:
2090        b = b - x[i]*U^T(i,:), i=0,...,n-2
2091        for i=m-1,...,0:
2092          sum[i] = (b[i] - U(i,:)x )/d[i];
2093          x[i]   = (1-omega)x[i] + omega*sum[i];
2094       */
2095       for (i=0; i<m; i++)
2096         t[i] = b[i];
2097 
2098       for (i=0; i<m-1; i++){  /* update rhs */
2099         v  = aa + ai[i] + 1;
2100         vj = aj + ai[i] + 1;
2101         nz = ai[i+1] - ai[i] - 1;
2102         while (nz--) t[*vj++] -= x[i]*(*v++);
2103         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2104       }
2105       for (i=m-1; i>=0; i--){
2106         d  = *(aa + ai[i]);
2107         v  = aa + ai[i] + 1;
2108         vj = aj + ai[i] + 1;
2109         nz = ai[i+1] - ai[i] - 1;
2110         sum = t[i];
2111         while (nz--) sum -= x[*vj++]*(*v++);
2112         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2113         x[i] =   (1-omega)*x[i] + omega*sum/d;
2114       }
2115     }
2116   }
2117 
2118   ierr = PetscFree(t);CHKERRQ(ierr);
2119   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2120   if (bb != xx) {
2121     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 
2127 
2128 
2129 
2130 
2131