xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision ae15b995b5732fffd2de5a75cf61ef7190c6fef1)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/inline/spops.h"
9 #include "src/mat/impls/sbaij/seq/sbaij.h"
10 
11 #define CHUNKSIZE  10
12 
13 /*
14      Checks for missing diagonals
15 */
16 #undef __FUNCT__
17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
19 {
20   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       *diag,*jj = a->j,i;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
38   PetscErrorCode ierr;
39   PetscInt       i,mbs = a->mbs;
40 
41   PetscFunctionBegin;
42   if (a->diag) PetscFunctionReturn(0);
43 
44   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
45   ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
46   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNCT__
51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
53 {
54   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
55   PetscInt     n = a->mbs,i;
56 
57   PetscFunctionBegin;
58   *nn = n;
59   if (!ia) PetscFunctionReturn(0);
60 
61   if (oshift == 1) {
62     /* temporarily add 1 to i and j indices */
63     PetscInt nz = a->i[n];
64     for (i=0; i<nz; i++) a->j[i]++;
65     for (i=0; i<n+1; i++) a->i[i]++;
66     *ia = a->i; *ja = a->j;
67   } else {
68     *ia = a->i; *ja = a->j;
69   }
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
76 {
77   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
78   PetscInt     i,n = a->mbs;
79 
80   PetscFunctionBegin;
81   if (!ia) PetscFunctionReturn(0);
82 
83   if (oshift == 1) {
84     PetscInt nz = a->i[n]-1;
85     for (i=0; i<nz; i++) a->j[i]--;
86     for (i=0; i<n+1; i++) a->i[i]--;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
94 {
95   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99 #if defined(PETSC_USE_LOG)
100   PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
101 #endif
102   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
103   if (a->row) {
104     ierr = ISDestroy(a->row);CHKERRQ(ierr);
105   }
106   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
107   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
108   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
109   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
110   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
111   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
112   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
113   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
114 
115   if (a->inew){
116     ierr = PetscFree(a->inew);CHKERRQ(ierr);
117     a->inew = 0;
118   }
119   ierr = PetscFree(a);CHKERRQ(ierr);
120 
121   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
122   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
123   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
124   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
125   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr);
126   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
132 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
133 {
134   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
135   PetscErrorCode ierr;
136 
137   PetscFunctionBegin;
138   switch (op) {
139   case MAT_ROW_ORIENTED:
140     a->roworiented = PETSC_TRUE;
141     break;
142   case MAT_COLUMN_ORIENTED:
143     a->roworiented = PETSC_FALSE;
144     break;
145   case MAT_COLUMNS_SORTED:
146     a->sorted = PETSC_TRUE;
147     break;
148   case MAT_COLUMNS_UNSORTED:
149     a->sorted = PETSC_FALSE;
150     break;
151   case MAT_KEEP_ZEROED_ROWS:
152     a->keepzeroedrows = PETSC_TRUE;
153     break;
154   case MAT_NO_NEW_NONZERO_LOCATIONS:
155     a->nonew = 1;
156     break;
157   case MAT_NEW_NONZERO_LOCATION_ERR:
158     a->nonew = -1;
159     break;
160   case MAT_NEW_NONZERO_ALLOCATION_ERR:
161     a->nonew = -2;
162     break;
163   case MAT_YES_NEW_NONZERO_LOCATIONS:
164     a->nonew = 0;
165     break;
166   case MAT_ROWS_SORTED:
167   case MAT_ROWS_UNSORTED:
168   case MAT_YES_NEW_DIAGONALS:
169   case MAT_IGNORE_OFF_PROC_ENTRIES:
170   case MAT_USE_HASH_TABLE:
171     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
172     break;
173   case MAT_NO_NEW_DIAGONALS:
174     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
175   case MAT_NOT_SYMMETRIC:
176   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
177   case MAT_HERMITIAN:
178     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
179   case MAT_SYMMETRIC:
180   case MAT_STRUCTURALLY_SYMMETRIC:
181   case MAT_NOT_HERMITIAN:
182   case MAT_SYMMETRY_ETERNAL:
183   case MAT_NOT_SYMMETRY_ETERNAL:
184   case MAT_IGNORE_LOWER_TRIANGULAR:
185     a->ignore_ltriangular = PETSC_TRUE;
186     break;
187   case MAT_ERROR_LOWER_TRIANGULAR:
188     a->ignore_ltriangular = PETSC_FALSE;
189     break;
190   case MAT_GETROW_UPPERTRIANGULAR:
191     a->getrow_utriangular = PETSC_TRUE;
192     break;
193   default:
194     SETERRQ(PETSC_ERR_SUP,"unknown option");
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
201 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
202 {
203   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
204   PetscErrorCode ierr;
205   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
206   MatScalar      *aa,*aa_i;
207   PetscScalar    *v_i;
208 
209   PetscFunctionBegin;
210   if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()");
211   /* Get the upper triangular part of the row */
212   bs  = A->bs;
213   ai  = a->i;
214   aj  = a->j;
215   aa  = a->a;
216   bs2 = a->bs2;
217 
218   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
219 
220   bn  = row/bs;   /* Block number */
221   bp  = row % bs; /* Block position */
222   M   = ai[bn+1] - ai[bn];
223   *ncols = bs*M;
224 
225   if (v) {
226     *v = 0;
227     if (*ncols) {
228       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
229       for (i=0; i<M; i++) { /* for each block in the block row */
230         v_i  = *v + i*bs;
231         aa_i = aa + bs2*(ai[bn] + i);
232         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
233       }
234     }
235   }
236 
237   if (cols) {
238     *cols = 0;
239     if (*ncols) {
240       ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr);
241       for (i=0; i<M; i++) { /* for each block in the block row */
242         cols_i = *cols + i*bs;
243         itmp  = bs*aj[ai[bn] + i];
244         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
245       }
246     }
247   }
248 
249   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
250   /* this segment is currently removed, so only entries in the upper triangle are obtained */
251 #ifdef column_search
252   v_i    = *v    + M*bs;
253   cols_i = *cols + M*bs;
254   for (i=0; i<bn; i++){ /* for each block row */
255     M = ai[i+1] - ai[i];
256     for (j=0; j<M; j++){
257       itmp = aj[ai[i] + j];    /* block column value */
258       if (itmp == bn){
259         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
260         for (k=0; k<bs; k++) {
261           *cols_i++ = i*bs+k;
262           *v_i++    = aa_i[k];
263         }
264         *ncols += bs;
265         break;
266       }
267     }
268   }
269 #endif
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
275 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
276 {
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
281   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ"
287 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
288 {
289   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
290 
291   PetscFunctionBegin;
292   a->getrow_utriangular = PETSC_TRUE;
293   PetscFunctionReturn(0);
294 }
295 #undef __FUNCT__
296 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ"
297 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
298 {
299   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
300 
301   PetscFunctionBegin;
302   a->getrow_utriangular = PETSC_FALSE;
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
308 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
309 {
310   PetscErrorCode ierr;
311   PetscFunctionBegin;
312   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
318 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
319 {
320   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
321   PetscErrorCode    ierr;
322   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
323   const char        *name;
324   PetscViewerFormat format;
325 
326   PetscFunctionBegin;
327   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
328   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
329   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
330     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
331   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
332     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
333   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
334     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
335     for (i=0; i<a->mbs; i++) {
336       for (j=0; j<bs; j++) {
337         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
338         for (k=a->i[i]; k<a->i[i+1]; k++) {
339           for (l=0; l<bs; l++) {
340 #if defined(PETSC_USE_COMPLEX)
341             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
342               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
343                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
344             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
345               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
346                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
347             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
348               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
349             }
350 #else
351             if (a->a[bs2*k + l*bs + j] != 0.0) {
352               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
353             }
354 #endif
355           }
356         }
357         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
358       }
359     }
360     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
362      PetscFunctionReturn(0);
363   } else {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
373                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
376                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else {
378               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             ierr = PetscViewerASCIIPrintf(viewer," %D %G ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
382 #endif
383           }
384         }
385         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
386       }
387     }
388     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
389   }
390   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNCT__
395 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
396 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
397 {
398   Mat            A = (Mat) Aa;
399   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
400   PetscErrorCode ierr;
401   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
402   PetscMPIInt    rank;
403   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
404   MatScalar      *aa;
405   MPI_Comm       comm;
406   PetscViewer    viewer;
407 
408   PetscFunctionBegin;
409   /*
410     This is nasty. If this is called from an originally parallel matrix
411     then all processes call this,but only the first has the matrix so the
412     rest should return immediately.
413   */
414   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
416   if (rank) PetscFunctionReturn(0);
417 
418   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
419 
420   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
421   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
422 
423   /* loop over matrix elements drawing boxes */
424   color = PETSC_DRAW_BLUE;
425   for (i=0,row=0; i<mbs; i++,row+=bs) {
426     for (j=a->i[i]; j<a->i[i+1]; j++) {
427       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
428       x_l = a->j[j]*bs; x_r = x_l + 1.0;
429       aa = a->a + j*bs2;
430       for (k=0; k<bs; k++) {
431         for (l=0; l<bs; l++) {
432           if (PetscRealPart(*aa++) >=  0.) continue;
433           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
434         }
435       }
436     }
437   }
438   color = PETSC_DRAW_CYAN;
439   for (i=0,row=0; i<mbs; i++,row+=bs) {
440     for (j=a->i[i]; j<a->i[i+1]; j++) {
441       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
442       x_l = a->j[j]*bs; x_r = x_l + 1.0;
443       aa = a->a + j*bs2;
444       for (k=0; k<bs; k++) {
445         for (l=0; l<bs; l++) {
446           if (PetscRealPart(*aa++) != 0.) continue;
447           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
448         }
449       }
450     }
451   }
452 
453   color = PETSC_DRAW_RED;
454   for (i=0,row=0; i<mbs; i++,row+=bs) {
455     for (j=a->i[i]; j<a->i[i+1]; j++) {
456       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
457       x_l = a->j[j]*bs; x_r = x_l + 1.0;
458       aa = a->a + j*bs2;
459       for (k=0; k<bs; k++) {
460         for (l=0; l<bs; l++) {
461           if (PetscRealPart(*aa++) <= 0.) continue;
462           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
463         }
464       }
465     }
466   }
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
472 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
473 {
474   PetscErrorCode ierr;
475   PetscReal      xl,yl,xr,yr,w,h;
476   PetscDraw      draw;
477   PetscTruth     isnull;
478 
479   PetscFunctionBegin;
480 
481   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
482   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
483 
484   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
485   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
486   xr += w;    yr += h;  xl = -w;     yl = -h;
487   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
488   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
489   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatView_SeqSBAIJ"
495 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
496 {
497   PetscErrorCode ierr;
498   PetscTruth     iascii,isdraw;
499 
500   PetscFunctionBegin;
501   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
502   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
503   if (iascii){
504     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
505   } else if (isdraw) {
506     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
507   } else {
508     Mat B;
509     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
510     ierr = MatView(B,viewer);CHKERRQ(ierr);
511     ierr = MatDestroy(B);CHKERRQ(ierr);
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
519 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
520 {
521   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
522   PetscInt     *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
523   PetscInt     *ai = a->i,*ailen = a->ilen;
524   PetscInt     brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
525   MatScalar    *ap,*aa = a->a,zero = 0.0;
526 
527   PetscFunctionBegin;
528   for (k=0; k<m; k++) { /* loop over rows */
529     row  = im[k]; brow = row/bs;
530     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
531     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
532     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
533     nrow = ailen[brow];
534     for (l=0; l<n; l++) { /* loop over columns */
535       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
536       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
537       col  = in[l] ;
538       bcol = col/bs;
539       cidx = col%bs;
540       ridx = row%bs;
541       high = nrow;
542       low  = 0; /* assume unsorted */
543       while (high-low > 5) {
544         t = (low+high)/2;
545         if (rp[t] > bcol) high = t;
546         else             low  = t;
547       }
548       for (i=low; i<high; i++) {
549         if (rp[i] > bcol) break;
550         if (rp[i] == bcol) {
551           *v++ = ap[bs2*i+bs*cidx+ridx];
552           goto finished;
553         }
554       }
555       *v++ = zero;
556        finished:;
557     }
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
565 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
566 {
567   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
568   PetscErrorCode  ierr;
569   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
570   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
571   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
572   PetscTruth      roworiented=a->roworiented;
573   const MatScalar *value = v;
574   MatScalar       *ap,*aa = a->a,*bap;
575 
576   PetscFunctionBegin;
577   if (roworiented) {
578     stepval = (n-1)*bs;
579   } else {
580     stepval = (m-1)*bs;
581   }
582   for (k=0; k<m; k++) { /* loop over added rows */
583     row  = im[k];
584     if (row < 0) continue;
585 #if defined(PETSC_USE_DEBUG)
586     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
587 #endif
588     rp   = aj + ai[row];
589     ap   = aa + bs2*ai[row];
590     rmax = imax[row];
591     nrow = ailen[row];
592     low  = 0;
593     high = nrow;
594     for (l=0; l<n; l++) { /* loop over added columns */
595       if (in[l] < 0) continue;
596       col = in[l];
597 #if defined(PETSC_USE_DEBUG)
598       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
599 #endif
600       if (col < row) continue; /* ignore lower triangular block */
601       if (roworiented) {
602         value = v + k*(stepval+bs)*bs + l*bs;
603       } else {
604         value = v + l*(stepval+bs)*bs + k*bs;
605       }
606       if (col <= lastcol) low = 0; else high = nrow;
607       lastcol = col;
608       while (high-low > 7) {
609         t = (low+high)/2;
610         if (rp[t] > col) high = t;
611         else             low  = t;
612       }
613       for (i=low; i<high; i++) {
614         if (rp[i] > col) break;
615         if (rp[i] == col) {
616           bap  = ap +  bs2*i;
617           if (roworiented) {
618             if (is == ADD_VALUES) {
619               for (ii=0; ii<bs; ii++,value+=stepval) {
620                 for (jj=ii; jj<bs2; jj+=bs) {
621                   bap[jj] += *value++;
622                 }
623               }
624             } else {
625               for (ii=0; ii<bs; ii++,value+=stepval) {
626                 for (jj=ii; jj<bs2; jj+=bs) {
627                   bap[jj] = *value++;
628                 }
629                }
630             }
631           } else {
632             if (is == ADD_VALUES) {
633               for (ii=0; ii<bs; ii++,value+=stepval) {
634                 for (jj=0; jj<bs; jj++) {
635                   *bap++ += *value++;
636                 }
637               }
638             } else {
639               for (ii=0; ii<bs; ii++,value+=stepval) {
640                 for (jj=0; jj<bs; jj++) {
641                   *bap++  = *value++;
642                 }
643               }
644             }
645           }
646           goto noinsert2;
647         }
648       }
649       if (nonew == 1) goto noinsert2;
650       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
651       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
652       N = nrow++ - 1; high++;
653       /* shift up all the later entries in this row */
654       for (ii=N; ii>=i; ii--) {
655         rp[ii+1] = rp[ii];
656         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
657       }
658       if (N >= i) {
659         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
660       }
661       rp[i] = col;
662       bap   = ap +  bs2*i;
663       if (roworiented) {
664         for (ii=0; ii<bs; ii++,value+=stepval) {
665           for (jj=ii; jj<bs2; jj+=bs) {
666             bap[jj] = *value++;
667           }
668         }
669       } else {
670         for (ii=0; ii<bs; ii++,value+=stepval) {
671           for (jj=0; jj<bs; jj++) {
672             *bap++  = *value++;
673           }
674         }
675        }
676     noinsert2:;
677       low = i;
678     }
679     ailen[row] = nrow;
680   }
681    PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
686 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
687 {
688   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
689   PetscErrorCode ierr;
690   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
691   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
692   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
693   MatScalar      *aa = a->a,*ap;
694 
695   PetscFunctionBegin;
696   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
697 
698   if (m) rmax = ailen[0];
699   for (i=1; i<mbs; i++) {
700     /* move each row back by the amount of empty slots (fshift) before it*/
701     fshift += imax[i-1] - ailen[i-1];
702      rmax   = PetscMax(rmax,ailen[i]);
703      if (fshift) {
704        ip = aj + ai[i]; ap = aa + bs2*ai[i];
705        N = ailen[i];
706        for (j=0; j<N; j++) {
707          ip[j-fshift] = ip[j];
708          ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
709        }
710      }
711      ai[i] = ai[i-1] + ailen[i-1];
712   }
713   if (mbs) {
714     fshift += imax[mbs-1] - ailen[mbs-1];
715      ai[mbs] = ai[mbs-1] + ailen[mbs-1];
716   }
717   /* reset ilen and imax for each row */
718   for (i=0; i<mbs; i++) {
719     ailen[i] = imax[i] = ai[i+1] - ai[i];
720   }
721   a->nz = ai[mbs];
722 
723   /* diagonals may have moved, reset it */
724   if (a->diag) {
725     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
726   }
727   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->m,A->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
728   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
729   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
730   a->reallocs          = 0;
731   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
732   PetscFunctionReturn(0);
733 }
734 
735 /*
736    This function returns an array of flags which indicate the locations of contiguous
737    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
738    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
739    Assume: sizes should be long enough to hold all the values.
740 */
741 #undef __FUNCT__
742 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
743 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
744 {
745   PetscInt   i,j,k,row;
746   PetscTruth flg;
747 
748   PetscFunctionBegin;
749    for (i=0,j=0; i<n; j++) {
750      row = idx[i];
751      if (row%bs!=0) { /* Not the begining of a block */
752        sizes[j] = 1;
753        i++;
754      } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
755        sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
756        i++;
757      } else { /* Begining of the block, so check if the complete block exists */
758        flg = PETSC_TRUE;
759        for (k=1; k<bs; k++) {
760          if (row+k != idx[i+k]) { /* break in the block */
761            flg = PETSC_FALSE;
762            break;
763          }
764        }
765        if (flg) { /* No break in the bs */
766          sizes[j] = bs;
767          i+= bs;
768        } else {
769          sizes[j] = 1;
770          i++;
771        }
772      }
773    }
774    *bs_max = j;
775    PetscFunctionReturn(0);
776 }
777 
778 
779 /* Only add/insert a(i,j) with i<=j (blocks).
780    Any a(i,j) with i>j input by user is ingored.
781 */
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
785 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
786 {
787   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
788   PetscErrorCode ierr;
789   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
790   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
791   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
792   PetscInt       ridx,cidx,bs2=a->bs2;
793   MatScalar      *ap,value,*aa=a->a,*bap;
794 
795   PetscFunctionBegin;
796   for (k=0; k<m; k++) { /* loop over added rows */
797     row  = im[k];       /* row number */
798     brow = row/bs;      /* block row number */
799     if (row < 0) continue;
800 #if defined(PETSC_USE_DEBUG)
801     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
802 #endif
803     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
804     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
805     rmax = imax[brow];  /* maximum space allocated for this row */
806     nrow = ailen[brow]; /* actual length of this row */
807     low  = 0;
808 
809     for (l=0; l<n; l++) { /* loop over added columns */
810       if (in[l] < 0) continue;
811 #if defined(PETSC_USE_DEBUG)
812       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
813 #endif
814       col = in[l];
815       bcol = col/bs;              /* block col number */
816 
817       if (brow > bcol) {
818         if (a->ignore_ltriangular){
819           continue; /* ignore lower triangular values */
820         } else {
821           SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
822         }
823       }
824 
825       ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
826       if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
827         /* element value a(k,l) */
828         if (roworiented) {
829           value = v[l + k*n];
830         } else {
831           value = v[k + l*m];
832         }
833 
834         /* move pointer bap to a(k,l) quickly and add/insert value */
835         if (col <= lastcol) low = 0; high = nrow;
836         lastcol = col;
837         while (high-low > 7) {
838           t = (low+high)/2;
839           if (rp[t] > bcol) high = t;
840           else              low  = t;
841         }
842         for (i=low; i<high; i++) {
843           if (rp[i] > bcol) break;
844           if (rp[i] == bcol) {
845             bap  = ap +  bs2*i + bs*cidx + ridx;
846             if (is == ADD_VALUES) *bap += value;
847             else                  *bap  = value;
848             /* for diag block, add/insert its symmetric element a(cidx,ridx) */
849             if (brow == bcol && ridx < cidx){
850               bap  = ap +  bs2*i + bs*ridx + cidx;
851               if (is == ADD_VALUES) *bap += value;
852               else                  *bap  = value;
853             }
854             goto noinsert1;
855           }
856         }
857 
858         if (nonew == 1) goto noinsert1;
859         if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
860         MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
861 
862         N = nrow++ - 1; high++;
863         /* shift up all the later entries in this row */
864         for (ii=N; ii>=i; ii--) {
865           rp[ii+1] = rp[ii];
866           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
867         }
868         if (N>=i) {
869           ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
870         }
871         rp[i]                      = bcol;
872         ap[bs2*i + bs*cidx + ridx] = value;
873       noinsert1:;
874         low = i;
875       }
876     }   /* end of loop over added columns */
877     ailen[brow] = nrow;
878   }   /* end of loop over added rows */
879   PetscFunctionReturn(0);
880 }
881 
882 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
883 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
884 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
885 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
886 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
887 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar);
888 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
889 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
890 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
891 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
892 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
893 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
894 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
895 
896 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
897 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
898 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
899 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
900 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
901 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
902 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
903 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
904 
905 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
906 
907 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
908 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
909 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
910 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
911 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
912 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
913 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
914 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
915 
916 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*);
917 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*);
918 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*);
919 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*);
920 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*);
921 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*);
922 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*);
923 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*);
924 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
925 
926 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
927 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
928 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
929 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
930 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
931 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
932 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
933 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
934 
935 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
936 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
937 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
938 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
939 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
940 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
941 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
942 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
943 
944 #ifdef HAVE_MatICCFactor
945 /* modified from MatILUFactor_SeqSBAIJ, needs further work!  */
946 #undef __FUNCT__
947 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
948 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
949 {
950   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
951   Mat            outA;
952   PetscErrorCode ierr;
953   PetscTruth     row_identity,col_identity;
954 
955   PetscFunctionBegin;
956   outA          = inA;
957   inA->factor   = FACTOR_CHOLESKY;
958 
959   if (!a->diag) {
960     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
961   }
962   /*
963     Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
964     for ILU(0) factorization with natural ordering
965   */
966   switch (a->bs) {
967   case 1:
968     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
969     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
970     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
971     ierr = PetscInfo((inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr);
972   case 2:
973     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
974     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
975     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
976     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr);
977     break;
978   case 3:
979      inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
980      inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
981      inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
982      ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr);
983      break;
984   case 4:
985     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
986     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
987     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
988     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr);
989     break;
990   case 5:
991     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
992     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
993     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
994     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr);
995     break;
996   case 6:
997     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
998     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
999     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1000     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr);
1001     break;
1002   case 7:
1003     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1004     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1005     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1006     ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr);
1007     break;
1008   default:
1009     a->row        = row;
1010     a->icol       = col;
1011     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1012     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1013 
1014     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1015     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1016     ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1017 
1018     if (!a->solve_work) {
1019       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1020       ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1021     }
1022   }
1023 
1024   ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 #endif
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1031 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1032 {
1033   static PetscTruth called = PETSC_FALSE;
1034   MPI_Comm          comm = A->comm;
1035   PetscErrorCode    ierr;
1036 
1037   PetscFunctionBegin;
1038   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1039   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1040   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1041   ierr = (*PetscHelpPrintf)(comm,"  -mat_ignore_lower_triangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr);
1042   ierr = (*PetscHelpPrintf)(comm,"  -mat_getrow_uppertriangular: Enable MatGetRow() for retrieving the upper triangular part of the row\n");CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 EXTERN_C_BEGIN
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1049 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1050 {
1051   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1052   PetscInt     i,nz,n;
1053 
1054   PetscFunctionBegin;
1055   nz = baij->maxnz;
1056   n  = mat->n;
1057   for (i=0; i<nz; i++) {
1058     baij->j[i] = indices[i];
1059   }
1060    baij->nz = nz;
1061    for (i=0; i<n; i++) {
1062      baij->ilen[i] = baij->imax[i];
1063    }
1064    PetscFunctionReturn(0);
1065 }
1066 EXTERN_C_END
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1070 /*@
1071   MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1072   in the matrix.
1073 
1074   Input Parameters:
1075   +  mat     - the SeqSBAIJ matrix
1076   -  indices - the column indices
1077 
1078   Level: advanced
1079 
1080   Notes:
1081   This can be called if you have precomputed the nonzero structure of the
1082   matrix and want to provide it to the matrix object to improve the performance
1083   of the MatSetValues() operation.
1084 
1085   You MUST have set the correct numbers of nonzeros per row in the call to
1086   MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
1087 
1088   MUST be called before any calls to MatSetValues()
1089 
1090   .seealso: MatCreateSeqSBAIJ
1091 @*/
1092 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1093 {
1094   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1095 
1096   PetscFunctionBegin;
1097   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1098   PetscValidPointer(indices,2);
1099   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1100   if (f) {
1101     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1102   } else {
1103     SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatCopy_SeqSBAIJ"
1110 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1111 {
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   /* If the two matrices have the same copy implementation, use fast copy. */
1116   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1117     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1118     Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1119 
1120     if (a->i[A->m] != b->i[B->m]) {
1121       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1122     }
1123     ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr);
1124   } else {
1125     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1126     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1127     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1128   }
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1134 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1135 {
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr =  MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1145 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1146 {
1147   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1148   PetscFunctionBegin;
1149   *array = a->a;
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1155 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1156 {
1157   PetscFunctionBegin;
1158   PetscFunctionReturn(0);
1159  }
1160 
1161 #include "petscblaslapack.h"
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1164 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1165 {
1166   Mat_SeqSBAIJ   *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1167   PetscErrorCode ierr;
1168   PetscInt       i,bs=Y->bs,bs2,j;
1169   PetscBLASInt   bnz = (PetscBLASInt)x->nz,one = 1;
1170 
1171   PetscFunctionBegin;
1172   if (str == SAME_NONZERO_PATTERN) {
1173     PetscScalar alpha = a;
1174     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1175   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1176     if (y->xtoy && y->XtoY != X) {
1177       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1178       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1179     }
1180     if (!y->xtoy) { /* get xtoy */
1181       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1182       y->XtoY = X;
1183     }
1184     bs2 = bs*bs;
1185     for (i=0; i<x->nz; i++) {
1186       j = 0;
1187       while (j < bs2){
1188         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1189         j++;
1190       }
1191     }
1192     ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1193   } else {
1194     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1195     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1196     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1203 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1204 {
1205   PetscFunctionBegin;
1206   *flg = PETSC_TRUE;
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1212 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1213 {
1214    PetscFunctionBegin;
1215    *flg = PETSC_TRUE;
1216    PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1221 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1222  {
1223    PetscFunctionBegin;
1224    *flg = PETSC_FALSE;
1225    PetscFunctionReturn(0);
1226  }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatRealPart_SeqSBAIJ"
1230 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1231 {
1232   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1233   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1234   PetscScalar    *aa = a->a;
1235 
1236   PetscFunctionBegin;
1237   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ"
1243 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1244 {
1245   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1246   PetscInt       i,nz = a->bs2*a->i[a->mbs];
1247   PetscScalar    *aa = a->a;
1248 
1249   PetscFunctionBegin;
1250   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 /* -------------------------------------------------------------------*/
1255 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1256        MatGetRow_SeqSBAIJ,
1257        MatRestoreRow_SeqSBAIJ,
1258        MatMult_SeqSBAIJ_N,
1259 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1260        MatMult_SeqSBAIJ_N,       /* transpose versions are same as non-transpose versions */
1261        MatMultAdd_SeqSBAIJ_N,
1262        MatSolve_SeqSBAIJ_N,
1263        0,
1264        0,
1265 /*10*/ 0,
1266        0,
1267        MatCholeskyFactor_SeqSBAIJ,
1268        MatRelax_SeqSBAIJ,
1269        MatTranspose_SeqSBAIJ,
1270 /*15*/ MatGetInfo_SeqSBAIJ,
1271        MatEqual_SeqSBAIJ,
1272        MatGetDiagonal_SeqSBAIJ,
1273        MatDiagonalScale_SeqSBAIJ,
1274        MatNorm_SeqSBAIJ,
1275 /*20*/ 0,
1276        MatAssemblyEnd_SeqSBAIJ,
1277        0,
1278        MatSetOption_SeqSBAIJ,
1279        MatZeroEntries_SeqSBAIJ,
1280 /*25*/ 0,
1281        0,
1282        0,
1283        MatCholeskyFactorSymbolic_SeqSBAIJ,
1284        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1285 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1286        0,
1287        MatICCFactorSymbolic_SeqSBAIJ,
1288        MatGetArray_SeqSBAIJ,
1289        MatRestoreArray_SeqSBAIJ,
1290 /*35*/ MatDuplicate_SeqSBAIJ,
1291        0,
1292        0,
1293        0,
1294        0,
1295 /*40*/ MatAXPY_SeqSBAIJ,
1296        MatGetSubMatrices_SeqSBAIJ,
1297        MatIncreaseOverlap_SeqSBAIJ,
1298        MatGetValues_SeqSBAIJ,
1299        MatCopy_SeqSBAIJ,
1300 /*45*/ MatPrintHelp_SeqSBAIJ,
1301        MatScale_SeqSBAIJ,
1302        0,
1303        0,
1304        0,
1305 /*50*/ 0,
1306        MatGetRowIJ_SeqSBAIJ,
1307        MatRestoreRowIJ_SeqSBAIJ,
1308        0,
1309        0,
1310 /*55*/ 0,
1311        0,
1312        0,
1313        0,
1314        MatSetValuesBlocked_SeqSBAIJ,
1315 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1316        0,
1317        0,
1318        MatGetPetscMaps_Petsc,
1319        0,
1320 /*65*/ 0,
1321        0,
1322        0,
1323        0,
1324        0,
1325 /*70*/ MatGetRowMax_SeqSBAIJ,
1326        0,
1327        0,
1328        0,
1329        0,
1330 /*75*/ 0,
1331        0,
1332        0,
1333        0,
1334        0,
1335 /*80*/ 0,
1336        0,
1337        0,
1338 #if !defined(PETSC_USE_COMPLEX)
1339        MatGetInertia_SeqSBAIJ,
1340 #else
1341        0,
1342 #endif
1343        MatLoad_SeqSBAIJ,
1344 /*85*/ MatIsSymmetric_SeqSBAIJ,
1345        MatIsHermitian_SeqSBAIJ,
1346        MatIsStructurallySymmetric_SeqSBAIJ,
1347        0,
1348        0,
1349 /*90*/ 0,
1350        0,
1351        0,
1352        0,
1353        0,
1354 /*95*/ 0,
1355        0,
1356        0,
1357        0,
1358        0,
1359 /*100*/0,
1360        0,
1361        0,
1362        0,
1363        0,
1364 /*105*/0,
1365        MatRealPart_SeqSBAIJ,
1366        MatImaginaryPart_SeqSBAIJ,
1367        MatGetRowUpperTriangular_SeqSBAIJ,
1368        MatRestoreRowUpperTriangular_SeqSBAIJ
1369 };
1370 
1371 EXTERN_C_BEGIN
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1374 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat)
1375 {
1376   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1377   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   if (aij->nonew != 1) {
1382     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1383   }
1384 
1385   /* allocate space for values if not already there */
1386   if (!aij->saved_values) {
1387     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1388   }
1389 
1390   /* copy values over */
1391   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 EXTERN_C_END
1395 
1396 EXTERN_C_BEGIN
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1399 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat)
1400 {
1401   Mat_SeqSBAIJ   *aij = (Mat_SeqSBAIJ *)mat->data;
1402   PetscErrorCode ierr;
1403   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
1404 
1405   PetscFunctionBegin;
1406   if (aij->nonew != 1) {
1407     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1408   }
1409   if (!aij->saved_values) {
1410     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1411   }
1412 
1413   /* copy values over */
1414   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 EXTERN_C_END
1418 
1419 EXTERN_C_BEGIN
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1422 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1423 {
1424   Mat_SeqSBAIJ   *b = (Mat_SeqSBAIJ*)B->data;
1425   PetscErrorCode ierr;
1426   PetscInt       i,mbs,bs2;
1427   PetscTruth     skipallocation = PETSC_FALSE,flg;
1428 
1429   PetscFunctionBegin;
1430   B->preallocated = PETSC_TRUE;
1431   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1432   mbs  = B->m/bs;
1433   bs2  = bs*bs;
1434 
1435   if (mbs*bs != B->m) {
1436     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1437   }
1438 
1439   if (nz == MAT_SKIP_ALLOCATION) {
1440     skipallocation = PETSC_TRUE;
1441     nz             = 0;
1442   }
1443 
1444   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1445   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1446   if (nnz) {
1447     for (i=0; i<mbs; i++) {
1448       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1449       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1450     }
1451   }
1452 
1453   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1454   if (!flg) {
1455     switch (bs) {
1456     case 1:
1457       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1458       B->ops->solve            = MatSolve_SeqSBAIJ_1;
1459       B->ops->solves           = MatSolves_SeqSBAIJ_1;
1460       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_1;
1461       B->ops->mult             = MatMult_SeqSBAIJ_1;
1462       B->ops->multadd          = MatMultAdd_SeqSBAIJ_1;
1463       B->ops->multtranspose    = MatMult_SeqSBAIJ_1;
1464       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1465       break;
1466     case 2:
1467       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1468       B->ops->solve            = MatSolve_SeqSBAIJ_2;
1469       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_2;
1470       B->ops->mult             = MatMult_SeqSBAIJ_2;
1471       B->ops->multadd          = MatMultAdd_SeqSBAIJ_2;
1472       B->ops->multtranspose    = MatMult_SeqSBAIJ_2;
1473       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1474       break;
1475     case 3:
1476       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1477       B->ops->solve            = MatSolve_SeqSBAIJ_3;
1478       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_3;
1479       B->ops->mult             = MatMult_SeqSBAIJ_3;
1480       B->ops->multadd          = MatMultAdd_SeqSBAIJ_3;
1481       B->ops->multtranspose    = MatMult_SeqSBAIJ_3;
1482       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1483       break;
1484     case 4:
1485       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1486       B->ops->solve            = MatSolve_SeqSBAIJ_4;
1487       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_4;
1488       B->ops->mult             = MatMult_SeqSBAIJ_4;
1489       B->ops->multadd          = MatMultAdd_SeqSBAIJ_4;
1490       B->ops->multtranspose    = MatMult_SeqSBAIJ_4;
1491       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1492       break;
1493     case 5:
1494       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1495       B->ops->solve            = MatSolve_SeqSBAIJ_5;
1496       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_5;
1497       B->ops->mult             = MatMult_SeqSBAIJ_5;
1498       B->ops->multadd          = MatMultAdd_SeqSBAIJ_5;
1499       B->ops->multtranspose    = MatMult_SeqSBAIJ_5;
1500       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1501       break;
1502     case 6:
1503       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1504       B->ops->solve            = MatSolve_SeqSBAIJ_6;
1505       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_6;
1506       B->ops->mult             = MatMult_SeqSBAIJ_6;
1507       B->ops->multadd          = MatMultAdd_SeqSBAIJ_6;
1508       B->ops->multtranspose    = MatMult_SeqSBAIJ_6;
1509       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1510       break;
1511     case 7:
1512       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1513       B->ops->solve            = MatSolve_SeqSBAIJ_7;
1514       B->ops->solvetranspose   = MatSolve_SeqSBAIJ_7;
1515       B->ops->mult             = MatMult_SeqSBAIJ_7;
1516       B->ops->multadd          = MatMultAdd_SeqSBAIJ_7;
1517       B->ops->multtranspose    = MatMult_SeqSBAIJ_7;
1518       B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1519       break;
1520     }
1521   }
1522 
1523   b->mbs = mbs;
1524   b->nbs = mbs;
1525   if (!skipallocation) {
1526     /* b->ilen will count nonzeros in each block row so far. */
1527     ierr   = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
1528     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1529     if (!nnz) {
1530       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1531       else if (nz <= 0)        nz = 1;
1532       for (i=0; i<mbs; i++) {
1533         b->imax[i] = nz;
1534       }
1535       nz = nz*mbs; /* total nz */
1536     } else {
1537       nz = 0;
1538       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1539     }
1540     /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1541 
1542     /* allocate the matrix space */
1543     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
1544     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1545     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1546     b->singlemalloc = PETSC_TRUE;
1547 
1548     /* pointer to beginning of each row */
1549     b->i[0] = 0;
1550     for (i=1; i<mbs+1; i++) {
1551       b->i[i] = b->i[i-1] + b->imax[i-1];
1552     }
1553   }
1554 
1555   B->bs               = bs;
1556   b->bs2              = bs2;
1557   b->nz             = 0;
1558   b->maxnz          = nz*bs2;
1559 
1560   b->inew             = 0;
1561   b->jnew             = 0;
1562   b->anew             = 0;
1563   b->a2anew           = 0;
1564   b->permute          = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 EXTERN_C_END
1568 
1569 EXTERN_C_BEGIN
1570 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1572 EXTERN_C_END
1573 
1574 /*MC
1575   MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1576   based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1577 
1578   Options Database Keys:
1579   . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1580 
1581   Level: beginner
1582 
1583   .seealso: MatCreateSeqSBAIJ
1584 M*/
1585 
1586 EXTERN_C_BEGIN
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1589 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B)
1590 {
1591   Mat_SeqSBAIJ   *b;
1592   PetscErrorCode ierr;
1593   PetscMPIInt    size;
1594   PetscTruth     flg;
1595 
1596   PetscFunctionBegin;
1597   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1598   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1599   B->m = B->M = PetscMax(B->m,B->M);
1600   B->n = B->N = PetscMax(B->n,B->N);
1601 
1602   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1603   B->data = (void*)b;
1604   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1605   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1606   B->ops->view        = MatView_SeqSBAIJ;
1607   B->factor           = 0;
1608   B->mapping          = 0;
1609   b->row              = 0;
1610   b->icol             = 0;
1611   b->reallocs         = 0;
1612   b->saved_values     = 0;
1613 
1614   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1615   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1616 
1617   b->sorted           = PETSC_FALSE;
1618   b->roworiented      = PETSC_TRUE;
1619   b->nonew            = 0;
1620   b->diag             = 0;
1621   b->solve_work       = 0;
1622   b->mult_work        = 0;
1623   B->spptr            = 0;
1624   b->keepzeroedrows   = PETSC_FALSE;
1625   b->xtoy             = 0;
1626   b->XtoY             = 0;
1627 
1628   b->inew             = 0;
1629   b->jnew             = 0;
1630   b->anew             = 0;
1631   b->a2anew           = 0;
1632   b->permute          = PETSC_FALSE;
1633 
1634   b->ignore_ltriangular = PETSC_FALSE;
1635   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr);
1636   if (flg) b->ignore_ltriangular = PETSC_TRUE;
1637 
1638   b->getrow_utriangular = PETSC_FALSE;
1639   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr);
1640   if (flg) b->getrow_utriangular = PETSC_TRUE;
1641 
1642   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1643                                      "MatStoreValues_SeqSBAIJ",
1644                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1645   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1646                                      "MatRetrieveValues_SeqSBAIJ",
1647                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1649                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1650                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1652                                      "MatConvert_SeqSBAIJ_SeqAIJ",
1653                                       MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1655                                      "MatConvert_SeqSBAIJ_SeqBAIJ",
1656                                       MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr);
1657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1658                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1659                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1660 
1661   B->symmetric                  = PETSC_TRUE;
1662   B->structurally_symmetric     = PETSC_TRUE;
1663   B->symmetric_set              = PETSC_TRUE;
1664   B->structurally_symmetric_set = PETSC_TRUE;
1665   PetscFunctionReturn(0);
1666 }
1667 EXTERN_C_END
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1671 /*@C
1672    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1673    compressed row) format.  For good matrix assembly performance the
1674    user should preallocate the matrix storage by setting the parameter nz
1675    (or the array nnz).  By setting these parameters accurately, performance
1676    during matrix assembly can be increased by more than a factor of 50.
1677 
1678    Collective on Mat
1679 
1680    Input Parameters:
1681 +  A - the symmetric matrix
1682 .  bs - size of block
1683 .  nz - number of block nonzeros per block row (same for all rows)
1684 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1685          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1686 
1687    Options Database Keys:
1688 .   -mat_no_unroll - uses code that does not unroll the loops in the
1689                      block calculations (much slower)
1690 .    -mat_block_size - size of the blocks to use
1691 
1692    Level: intermediate
1693 
1694    Notes:
1695    Specify the preallocated storage with either nz or nnz (not both).
1696    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1697    allocation.  For additional details, see the users manual chapter on
1698    matrices.
1699 
1700    If the nnz parameter is given then the nz parameter is ignored
1701 
1702 
1703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1704 @*/
1705 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1706 {
1707   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1711   if (f) {
1712     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1713   }
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatCreateSeqSBAIJ"
1719 /*@C
1720    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1721    compressed row) format.  For good matrix assembly performance the
1722    user should preallocate the matrix storage by setting the parameter nz
1723    (or the array nnz).  By setting these parameters accurately, performance
1724    during matrix assembly can be increased by more than a factor of 50.
1725 
1726    Collective on MPI_Comm
1727 
1728    Input Parameters:
1729 +  comm - MPI communicator, set to PETSC_COMM_SELF
1730 .  bs - size of block
1731 .  m - number of rows, or number of columns
1732 .  nz - number of block nonzeros per block row (same for all rows)
1733 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1734          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1735 
1736    Output Parameter:
1737 .  A - the symmetric matrix
1738 
1739    Options Database Keys:
1740 .   -mat_no_unroll - uses code that does not unroll the loops in the
1741                      block calculations (much slower)
1742 .    -mat_block_size - size of the blocks to use
1743 
1744    Level: intermediate
1745 
1746    Notes:
1747 
1748    Specify the preallocated storage with either nz or nnz (not both).
1749    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1750    allocation.  For additional details, see the users manual chapter on
1751    matrices.
1752 
1753    If the nnz parameter is given then the nz parameter is ignored
1754 
1755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1756 @*/
1757 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1758 {
1759   PetscErrorCode ierr;
1760 
1761   PetscFunctionBegin;
1762   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1763   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1764   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1765   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1771 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1772 {
1773   Mat            C;
1774   Mat_SeqSBAIJ   *c,*a = (Mat_SeqSBAIJ*)A->data;
1775   PetscErrorCode ierr;
1776   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1777 
1778   PetscFunctionBegin;
1779   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1780 
1781   *B = 0;
1782   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1783   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
1784   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1785   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1786   c    = (Mat_SeqSBAIJ*)C->data;
1787 
1788   C->preallocated   = PETSC_TRUE;
1789   C->factor         = A->factor;
1790   c->row            = 0;
1791   c->icol           = 0;
1792   c->saved_values   = 0;
1793   c->keepzeroedrows = a->keepzeroedrows;
1794   C->assembled      = PETSC_TRUE;
1795 
1796   C->M    = A->M;
1797   C->N    = A->N;
1798   C->bs   = A->bs;
1799   c->bs2  = a->bs2;
1800   c->mbs  = a->mbs;
1801   c->nbs  = a->nbs;
1802 
1803   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr);
1804   ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr);
1805   for (i=0; i<mbs; i++) {
1806     c->imax[i] = a->imax[i];
1807     c->ilen[i] = a->ilen[i];
1808   }
1809   ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1810 
1811   /* allocate the matrix space */
1812   ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
1813   c->singlemalloc = PETSC_TRUE;
1814   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1815   ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr);
1816   if (mbs > 0) {
1817     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
1818     if (cpvalues == MAT_COPY_VALUES) {
1819       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1820     } else {
1821       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1822     }
1823   }
1824 
1825   c->sorted      = a->sorted;
1826   c->roworiented = a->roworiented;
1827   c->nonew       = a->nonew;
1828 
1829   if (a->diag) {
1830     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
1831     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1832     for (i=0; i<mbs; i++) {
1833       c->diag[i] = a->diag[i];
1834     }
1835   } else c->diag        = 0;
1836   c->nz               = a->nz;
1837   c->maxnz            = a->maxnz;
1838   c->solve_work         = 0;
1839   c->mult_work          = 0;
1840   *B = C;
1841   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1847 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A)
1848 {
1849   Mat_SeqSBAIJ   *a;
1850   Mat            B;
1851   PetscErrorCode ierr;
1852   int            fd;
1853   PetscMPIInt    size;
1854   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
1855   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1856   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
1857   PetscInt       *masked,nmask,tmp,bs2,ishift;
1858   PetscScalar    *aa;
1859   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1860 
1861   PetscFunctionBegin;
1862   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1863   bs2  = bs*bs;
1864 
1865   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1866   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1867   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1868   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1869   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1870   M = header[1]; N = header[2]; nz = header[3];
1871 
1872   if (header[3] < 0) {
1873     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1874   }
1875 
1876   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1877 
1878   /*
1879      This code adds extra rows to make sure the number of rows is
1880     divisible by the blocksize
1881   */
1882   mbs        = M/bs;
1883   extra_rows = bs - M + bs*(mbs);
1884   if (extra_rows == bs) extra_rows = 0;
1885   else                  mbs++;
1886   if (extra_rows) {
1887     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
1888   }
1889 
1890   /* read in row lengths */
1891   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1892   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1893   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1894 
1895   /* read in column indices */
1896   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1897   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1898   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1899 
1900   /* loop over row lengths determining block row lengths */
1901   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr);
1902   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1903   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
1904   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
1905   masked   = mask + mbs;
1906   rowcount = 0; nzcount = 0;
1907   for (i=0; i<mbs; i++) {
1908     nmask = 0;
1909     for (j=0; j<bs; j++) {
1910       kmax = rowlengths[rowcount];
1911       for (k=0; k<kmax; k++) {
1912         tmp = jj[nzcount++]/bs;   /* block col. index */
1913         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1914       }
1915       rowcount++;
1916     }
1917     s_browlengths[i] += nmask;
1918 
1919     /* zero out the mask elements we set */
1920     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1921   }
1922 
1923   /* create our matrix */
1924   ierr = MatCreate(comm,&B);CHKERRQ(ierr);
1925   ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
1926   ierr = MatSetType(B,type);CHKERRQ(ierr);
1927   ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr);
1928   a = (Mat_SeqSBAIJ*)B->data;
1929 
1930   /* set matrix "i" values */
1931   a->i[0] = 0;
1932   for (i=1; i<= mbs; i++) {
1933     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1934     a->ilen[i-1] = s_browlengths[i-1];
1935   }
1936   a->nz = a->i[mbs];
1937 
1938   /* read in nonzero values */
1939   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1940   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1941   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1942 
1943   /* set "a" and "j" values into matrix */
1944   nzcount = 0; jcount = 0;
1945   for (i=0; i<mbs; i++) {
1946     nzcountb = nzcount;
1947     nmask    = 0;
1948     for (j=0; j<bs; j++) {
1949       kmax = rowlengths[i*bs+j];
1950       for (k=0; k<kmax; k++) {
1951         tmp = jj[nzcount++]/bs; /* block col. index */
1952         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1953       }
1954     }
1955     /* sort the masked values */
1956     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1957 
1958     /* set "j" values into matrix */
1959     maskcount = 1;
1960     for (j=0; j<nmask; j++) {
1961       a->j[jcount++]  = masked[j];
1962       mask[masked[j]] = maskcount++;
1963     }
1964 
1965     /* set "a" values into matrix */
1966     ishift = bs2*a->i[i];
1967     for (j=0; j<bs; j++) {
1968       kmax = rowlengths[i*bs+j];
1969       for (k=0; k<kmax; k++) {
1970         tmp       = jj[nzcountb]/bs ; /* block col. index */
1971         if (tmp >= i){
1972           block     = mask[tmp] - 1;
1973           point     = jj[nzcountb] - bs*tmp;
1974           idx       = ishift + bs2*block + j + bs*point;
1975           a->a[idx] = aa[nzcountb];
1976         }
1977         nzcountb++;
1978       }
1979     }
1980     /* zero out the mask elements we set */
1981     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1982   }
1983   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1984 
1985   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1986   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1987   ierr = PetscFree(aa);CHKERRQ(ierr);
1988   ierr = PetscFree(jj);CHKERRQ(ierr);
1989   ierr = PetscFree(mask);CHKERRQ(ierr);
1990 
1991   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1992   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatView_Private(B);CHKERRQ(ierr);
1994   *A = B;
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2000 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2001 {
2002   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
2003   MatScalar      *aa=a->a,*v,*v1;
2004   PetscScalar    *x,*b,*t,sum,d;
2005   PetscErrorCode ierr;
2006   PetscInt       m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j;
2007   PetscInt       nz,nz1,*vj,*vj1,i;
2008 
2009   PetscFunctionBegin;
2010   its = its*lits;
2011   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2012 
2013   if (bs > 1)
2014     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2015 
2016   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2017   if (xx != bb) {
2018     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2019   } else {
2020     b = x;
2021   }
2022 
2023   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2024 
2025   if (flag & SOR_ZERO_INITIAL_GUESS) {
2026     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2027       for (i=0; i<m; i++)
2028         t[i] = b[i];
2029 
2030       for (i=0; i<m; i++){
2031         d  = *(aa + ai[i]);  /* diag[i] */
2032         v  = aa + ai[i] + 1;
2033         vj = aj + ai[i] + 1;
2034         nz = ai[i+1] - ai[i] - 1;
2035         x[i] = omega*t[i]/d;
2036         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2037         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2038       }
2039     }
2040 
2041     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2042       for (i=0; i<m; i++)
2043         t[i] = b[i];
2044 
2045       for (i=0; i<m-1; i++){  /* update rhs */
2046         v  = aa + ai[i] + 1;
2047         vj = aj + ai[i] + 1;
2048         nz = ai[i+1] - ai[i] - 1;
2049         while (nz--) t[*vj++] -= x[i]*(*v++);
2050         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2051       }
2052       for (i=m-1; i>=0; i--){
2053         d  = *(aa + ai[i]);
2054         v  = aa + ai[i] + 1;
2055         vj = aj + ai[i] + 1;
2056         nz = ai[i+1] - ai[i] - 1;
2057         sum = t[i];
2058         while (nz--) sum -= x[*vj++]*(*v++);
2059         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2060         x[i] =   (1-omega)*x[i] + omega*sum/d;
2061       }
2062     }
2063     its--;
2064   }
2065 
2066   while (its--) {
2067     /*
2068        forward sweep:
2069        for i=0,...,m-1:
2070          sum[i] = (b[i] - U(i,:)x )/d[i];
2071          x[i]   = (1-omega)x[i] + omega*sum[i];
2072          b      = b - x[i]*U^T(i,:);
2073 
2074     */
2075     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2076       for (i=0; i<m; i++)
2077         t[i] = b[i];
2078 
2079       for (i=0; i<m; i++){
2080         d  = *(aa + ai[i]);  /* diag[i] */
2081         v  = aa + ai[i] + 1; v1=v;
2082         vj = aj + ai[i] + 1; vj1=vj;
2083         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2084         sum = t[i];
2085         while (nz1--) sum -= (*v1++)*x[*vj1++];
2086         x[i] = (1-omega)*x[i] + omega*sum/d;
2087         while (nz--) t[*vj++] -= x[i]*(*v++);
2088         ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr);
2089       }
2090     }
2091 
2092   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2093       /*
2094        backward sweep:
2095        b = b - x[i]*U^T(i,:), i=0,...,n-2
2096        for i=m-1,...,0:
2097          sum[i] = (b[i] - U(i,:)x )/d[i];
2098          x[i]   = (1-omega)x[i] + omega*sum[i];
2099       */
2100       for (i=0; i<m; i++)
2101         t[i] = b[i];
2102 
2103       for (i=0; i<m-1; i++){  /* update rhs */
2104         v  = aa + ai[i] + 1;
2105         vj = aj + ai[i] + 1;
2106         nz = ai[i+1] - ai[i] - 1;
2107         while (nz--) t[*vj++] -= x[i]*(*v++);
2108         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2109       }
2110       for (i=m-1; i>=0; i--){
2111         d  = *(aa + ai[i]);
2112         v  = aa + ai[i] + 1;
2113         vj = aj + ai[i] + 1;
2114         nz = ai[i+1] - ai[i] - 1;
2115         sum = t[i];
2116         while (nz--) sum -= x[*vj++]*(*v++);
2117         ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr);
2118         x[i] =   (1-omega)*x[i] + omega*sum/d;
2119       }
2120     }
2121   }
2122 
2123   ierr = PetscFree(t);CHKERRQ(ierr);
2124   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2125   if (bb != xx) {
2126     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 
2132 
2133 
2134 
2135 
2136