xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 5ca2875643c1dba871b18f4695cda45ca2bfcf55)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
204   MatScalar    *aa,*aa_i;
205   PetscScalar  *v_i;
206 
207   PetscFunctionBegin;
208   bs  = a->bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266 
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
273 {
274   int ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
278   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
285 {
286   int ierr;
287   PetscFunctionBegin;
288   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
294 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
295 {
296   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
297   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
298   PetscScalar  *aa;
299   FILE         *file;
300 
301   PetscFunctionBegin;
302   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
303   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
304   col_lens[0] = MAT_FILE_COOKIE;
305 
306   col_lens[1] = A->m;
307   col_lens[2] = A->m;
308   col_lens[3] = a->s_nz*bs2;
309 
310   /* store lengths of each row and write (including header) to file */
311   count = 0;
312   for (i=0; i<a->mbs; i++) {
313     for (j=0; j<bs; j++) {
314       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
315     }
316   }
317   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
318   ierr = PetscFree(col_lens);CHKERRQ(ierr);
319 
320   /* store column indices (zero start index) */
321   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
322   count = 0;
323   for (i=0; i<a->mbs; i++) {
324     for (j=0; j<bs; j++) {
325       for (k=a->i[i]; k<a->i[i+1]; k++) {
326         for (l=0; l<bs; l++) {
327           jj[count++] = bs*a->j[k] + l;
328         }
329       }
330     }
331   }
332   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
333   ierr = PetscFree(jj);CHKERRQ(ierr);
334 
335   /* store nonzero values */
336   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
337   count = 0;
338   for (i=0; i<a->mbs; i++) {
339     for (j=0; j<bs; j++) {
340       for (k=a->i[i]; k<a->i[i+1]; k++) {
341         for (l=0; l<bs; l++) {
342           aa[count++] = a->a[bs2*k + l*bs + j];
343         }
344       }
345     }
346   }
347   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
348   ierr = PetscFree(aa);CHKERRQ(ierr);
349 
350   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
351   if (file) {
352     fprintf(file,"-matload_block_size %d\n",a->bs);
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
359 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
360 {
361   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
362   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
363   char              *name;
364   PetscViewerFormat format;
365 
366   PetscFunctionBegin;
367   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
368   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
369   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
370     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
371   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
372     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
373   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
374     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
375     for (i=0; i<a->mbs; i++) {
376       for (j=0; j<bs; j++) {
377         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
378         for (k=a->i[i]; k<a->i[i+1]; k++) {
379           for (l=0; l<bs; l++) {
380 #if defined(PETSC_USE_COMPLEX)
381             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
383                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
384             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
386                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
387             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
388               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             }
390 #else
391             if (a->a[bs2*k + l*bs + j] != 0.0) {
392               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
393             }
394 #endif
395           }
396         }
397         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
398       }
399     }
400     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
401   } else {
402     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
403     for (i=0; i<a->mbs; i++) {
404       for (j=0; j<bs; j++) {
405         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
406         for (k=a->i[i]; k<a->i[i+1]; k++) {
407           for (l=0; l<bs; l++) {
408 #if defined(PETSC_USE_COMPLEX)
409             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
410               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
411                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
412             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
413               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
414                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
415             } else {
416               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
417             }
418 #else
419             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
420 #endif
421           }
422         }
423         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
424       }
425     }
426     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
427   }
428   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
434 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
435 {
436   Mat          A = (Mat) Aa;
437   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
438   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
439   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
440   MatScalar    *aa;
441   MPI_Comm     comm;
442   PetscViewer  viewer;
443 
444   PetscFunctionBegin;
445   /*
446       This is nasty. If this is called from an originally parallel matrix
447    then all processes call this,but only the first has the matrix so the
448    rest should return immediately.
449   */
450   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
451   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
452   if (rank) PetscFunctionReturn(0);
453 
454   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
455 
456   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
457   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
458 
459   /* loop over matrix elements drawing boxes */
460   color = PETSC_DRAW_BLUE;
461   for (i=0,row=0; i<mbs; i++,row+=bs) {
462     for (j=a->i[i]; j<a->i[i+1]; j++) {
463       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
464       x_l = a->j[j]*bs; x_r = x_l + 1.0;
465       aa = a->a + j*bs2;
466       for (k=0; k<bs; k++) {
467         for (l=0; l<bs; l++) {
468           if (PetscRealPart(*aa++) >=  0.) continue;
469           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
470         }
471       }
472     }
473   }
474   color = PETSC_DRAW_CYAN;
475   for (i=0,row=0; i<mbs; i++,row+=bs) {
476     for (j=a->i[i]; j<a->i[i+1]; j++) {
477       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
478       x_l = a->j[j]*bs; x_r = x_l + 1.0;
479       aa = a->a + j*bs2;
480       for (k=0; k<bs; k++) {
481         for (l=0; l<bs; l++) {
482           if (PetscRealPart(*aa++) != 0.) continue;
483           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488 
489   color = PETSC_DRAW_RED;
490   for (i=0,row=0; i<mbs; i++,row+=bs) {
491     for (j=a->i[i]; j<a->i[i+1]; j++) {
492       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
493       x_l = a->j[j]*bs; x_r = x_l + 1.0;
494       aa = a->a + j*bs2;
495       for (k=0; k<bs; k++) {
496         for (l=0; l<bs; l++) {
497           if (PetscRealPart(*aa++) <= 0.) continue;
498           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
499         }
500       }
501     }
502   }
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
508 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
509 {
510   int          ierr;
511   PetscReal    xl,yl,xr,yr,w,h;
512   PetscDraw    draw;
513   PetscTruth   isnull;
514 
515   PetscFunctionBegin;
516 
517   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
518   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
519 
520   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
521   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
522   xr += w;    yr += h;  xl = -w;     yl = -h;
523   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
524   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
525   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatView_SeqSBAIJ"
531 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
532 {
533   int        ierr;
534   PetscTruth issocket,isascii,isbinary,isdraw;
535 
536   PetscFunctionBegin;
537   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
538   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
539   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
540   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
541   if (issocket) {
542     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
543   } else if (isascii){
544     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
545   } else if (isbinary) {
546     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
547   } else if (isdraw) {
548     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
549   } else {
550     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
558 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
559 {
560   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
561   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
562   int        *ai = a->i,*ailen = a->ilen;
563   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
564   MatScalar  *ap,*aa = a->a,zero = 0.0;
565 
566   PetscFunctionBegin;
567   for (k=0; k<m; k++) { /* loop over rows */
568     row  = im[k]; brow = row/bs;
569     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
570     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
571     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
572     nrow = ailen[brow];
573     for (l=0; l<n; l++) { /* loop over columns */
574       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
575       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
576       col  = in[l] ;
577       bcol = col/bs;
578       cidx = col%bs;
579       ridx = row%bs;
580       high = nrow;
581       low  = 0; /* assume unsorted */
582       while (high-low > 5) {
583         t = (low+high)/2;
584         if (rp[t] > bcol) high = t;
585         else             low  = t;
586       }
587       for (i=low; i<high; i++) {
588         if (rp[i] > bcol) break;
589         if (rp[i] == bcol) {
590           *v++ = ap[bs2*i+bs*cidx+ridx];
591           goto finished;
592         }
593       }
594       *v++ = zero;
595       finished:;
596     }
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
604 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
605 {
606   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
607   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
608   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
609   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
610   PetscTruth      roworiented=a->roworiented;
611   const MatScalar *value = v;
612   MatScalar       *ap,*aa = a->a,*bap;
613 
614   PetscFunctionBegin;
615   if (roworiented) {
616     stepval = (n-1)*bs;
617   } else {
618     stepval = (m-1)*bs;
619   }
620   for (k=0; k<m; k++) { /* loop over added rows */
621     row  = im[k];
622     if (row < 0) continue;
623 #if defined(PETSC_USE_BOPT_g)
624     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
625 #endif
626     rp   = aj + ai[row];
627     ap   = aa + bs2*ai[row];
628     rmax = imax[row];
629     nrow = ailen[row];
630     low  = 0;
631     for (l=0; l<n; l++) { /* loop over added columns */
632       if (in[l] < 0) continue;
633       col = in[l];
634 #if defined(PETSC_USE_BOPT_g)
635       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
636 #endif
637       if (col < row) continue; /* ignore lower triangular block */
638       if (roworiented) {
639         value = v + k*(stepval+bs)*bs + l*bs;
640       } else {
641         value = v + l*(stepval+bs)*bs + k*bs;
642       }
643       if (!sorted) low = 0; high = nrow;
644       while (high-low > 7) {
645         t = (low+high)/2;
646         if (rp[t] > col) high = t;
647         else             low  = t;
648       }
649       for (i=low; i<high; i++) {
650         if (rp[i] > col) break;
651         if (rp[i] == col) {
652           bap  = ap +  bs2*i;
653           if (roworiented) {
654             if (is == ADD_VALUES) {
655               for (ii=0; ii<bs; ii++,value+=stepval) {
656                 for (jj=ii; jj<bs2; jj+=bs) {
657                   bap[jj] += *value++;
658                 }
659               }
660             } else {
661               for (ii=0; ii<bs; ii++,value+=stepval) {
662                 for (jj=ii; jj<bs2; jj+=bs) {
663                   bap[jj] = *value++;
664                 }
665               }
666             }
667           } else {
668             if (is == ADD_VALUES) {
669               for (ii=0; ii<bs; ii++,value+=stepval) {
670                 for (jj=0; jj<bs; jj++) {
671                   *bap++ += *value++;
672                 }
673               }
674             } else {
675               for (ii=0; ii<bs; ii++,value+=stepval) {
676                 for (jj=0; jj<bs; jj++) {
677                   *bap++  = *value++;
678                 }
679               }
680             }
681           }
682           goto noinsert2;
683         }
684       }
685       if (nonew == 1) goto noinsert2;
686       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
687       if (nrow >= rmax) {
688         /* there is no extra room in row, therefore enlarge */
689         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
690         MatScalar *new_a;
691 
692         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
693 
694         /* malloc new storage space */
695         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
696 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
697         new_j   = (int*)(new_a + bs2*new_nz);
698         new_i   = new_j + new_nz;
699 
700         /* copy over old data into new slots */
701         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
702         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
703         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
704         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
705         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
706         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
707         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
708         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
709         /* free up old matrix storage */
710         ierr = PetscFree(a->a);CHKERRQ(ierr);
711         if (!a->singlemalloc) {
712           ierr = PetscFree(a->i);CHKERRQ(ierr);
713           ierr = PetscFree(a->j);CHKERRQ(ierr);
714         }
715         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
716         a->singlemalloc = PETSC_TRUE;
717 
718         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
719         rmax = imax[row] = imax[row] + CHUNKSIZE;
720         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
721         a->s_maxnz += bs2*CHUNKSIZE;
722         a->reallocs++;
723         a->s_nz++;
724       }
725       N = nrow++ - 1;
726       /* shift up all the later entries in this row */
727       for (ii=N; ii>=i; ii--) {
728         rp[ii+1] = rp[ii];
729         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
730       }
731       if (N >= i) {
732         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
733       }
734       rp[i] = col;
735       bap   = ap +  bs2*i;
736       if (roworiented) {
737         for (ii=0; ii<bs; ii++,value+=stepval) {
738           for (jj=ii; jj<bs2; jj+=bs) {
739             bap[jj] = *value++;
740           }
741         }
742       } else {
743         for (ii=0; ii<bs; ii++,value+=stepval) {
744           for (jj=0; jj<bs; jj++) {
745             *bap++  = *value++;
746           }
747         }
748       }
749       noinsert2:;
750       low = i;
751     }
752     ailen[row] = nrow;
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
759 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
760 {
761   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
762   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
763   int        m = A->m,*ip,N,*ailen = a->ilen;
764   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
765   MatScalar  *aa = a->a,*ap;
766 
767   PetscFunctionBegin;
768   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
769 
770   if (m) rmax = ailen[0];
771   for (i=1; i<mbs; i++) {
772     /* move each row back by the amount of empty slots (fshift) before it*/
773     fshift += imax[i-1] - ailen[i-1];
774     rmax   = PetscMax(rmax,ailen[i]);
775     if (fshift) {
776       ip = aj + ai[i]; ap = aa + bs2*ai[i];
777       N = ailen[i];
778       for (j=0; j<N; j++) {
779         ip[j-fshift] = ip[j];
780         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
781       }
782     }
783     ai[i] = ai[i-1] + ailen[i-1];
784   }
785   if (mbs) {
786     fshift += imax[mbs-1] - ailen[mbs-1];
787     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
788   }
789   /* reset ilen and imax for each row */
790   for (i=0; i<mbs; i++) {
791     ailen[i] = imax[i] = ai[i+1] - ai[i];
792   }
793   a->s_nz = ai[mbs];
794 
795   /* diagonals may have moved, reset it */
796   if (a->diag) {
797     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
798   }
799   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
800            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
801   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
802            a->reallocs);
803   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
804   a->reallocs          = 0;
805   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
806 
807   PetscFunctionReturn(0);
808 }
809 
810 /*
811    This function returns an array of flags which indicate the locations of contiguous
812    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
813    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
814    Assume: sizes should be long enough to hold all the values.
815 */
816 #undef __FUNCT__
817 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
818 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
819 {
820   int        i,j,k,row;
821   PetscTruth flg;
822 
823   PetscFunctionBegin;
824   for (i=0,j=0; i<n; j++) {
825     row = idx[i];
826     if (row%bs!=0) { /* Not the begining of a block */
827       sizes[j] = 1;
828       i++;
829     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
830       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
831       i++;
832     } else { /* Begining of the block, so check if the complete block exists */
833       flg = PETSC_TRUE;
834       for (k=1; k<bs; k++) {
835         if (row+k != idx[i+k]) { /* break in the block */
836           flg = PETSC_FALSE;
837           break;
838         }
839       }
840       if (flg == PETSC_TRUE) { /* No break in the bs */
841         sizes[j] = bs;
842         i+= bs;
843       } else {
844         sizes[j] = 1;
845         i++;
846       }
847     }
848   }
849   *bs_max = j;
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
855 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
856 {
857   PetscFunctionBegin;
858   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
859 }
860 
861 /* Only add/insert a(i,j) with i<=j (blocks).
862    Any a(i,j) with i>j input by user is ingored.
863 */
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
867 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
868 {
869   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
870   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
871   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
872   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
873   int         ridx,cidx,bs2=a->bs2,ierr;
874   MatScalar   *ap,value,*aa=a->a,*bap;
875 
876   PetscFunctionBegin;
877 
878   for (k=0; k<m; k++) { /* loop over added rows */
879     row  = im[k];       /* row number */
880     brow = row/bs;      /* block row number */
881     if (row < 0) continue;
882 #if defined(PETSC_USE_BOPT_g)
883     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
884 #endif
885     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
886     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
887     rmax = imax[brow];  /* maximum space allocated for this row */
888     nrow = ailen[brow]; /* actual length of this row */
889     low  = 0;
890 
891     for (l=0; l<n; l++) { /* loop over added columns */
892       if (in[l] < 0) continue;
893 #if defined(PETSC_USE_BOPT_g)
894       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
895 #endif
896       col = in[l];
897       bcol = col/bs;              /* block col number */
898 
899       if (brow <= bcol){
900         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
901         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
902           /* element value a(k,l) */
903           if (roworiented) {
904             value = v[l + k*n];
905           } else {
906             value = v[k + l*m];
907           }
908 
909           /* move pointer bap to a(k,l) quickly and add/insert value */
910           if (!sorted) low = 0; high = nrow;
911           while (high-low > 7) {
912             t = (low+high)/2;
913             if (rp[t] > bcol) high = t;
914             else              low  = t;
915           }
916           for (i=low; i<high; i++) {
917             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
918             if (rp[i] > bcol) break;
919             if (rp[i] == bcol) {
920               bap  = ap +  bs2*i + bs*cidx + ridx;
921               if (is == ADD_VALUES) *bap += value;
922               else                  *bap  = value;
923               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
924               if (brow == bcol && ridx < cidx){
925                 bap  = ap +  bs2*i + bs*ridx + cidx;
926                 if (is == ADD_VALUES) *bap += value;
927                 else                  *bap  = value;
928               }
929               goto noinsert1;
930             }
931           }
932 
933       if (nonew == 1) goto noinsert1;
934       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
935       if (nrow >= rmax) {
936         /* there is no extra room in row, therefore enlarge */
937         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
938         MatScalar *new_a;
939 
940         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
941 
942         /* Malloc new storage space */
943         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
944         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
945         new_j = (int*)(new_a + bs2*new_nz);
946         new_i = new_j + new_nz;
947 
948         /* copy over old data into new slots */
949         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
950         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
951         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
952         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
953         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
954         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
955         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
956         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
957         /* free up old matrix storage */
958         ierr = PetscFree(a->a);CHKERRQ(ierr);
959         if (!a->singlemalloc) {
960           ierr = PetscFree(a->i);CHKERRQ(ierr);
961           ierr = PetscFree(a->j);CHKERRQ(ierr);
962         }
963         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
964         a->singlemalloc = PETSC_TRUE;
965 
966         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
967         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
968         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
969         a->s_maxnz += bs2*CHUNKSIZE;
970         a->reallocs++;
971         a->s_nz++;
972       }
973 
974       N = nrow++ - 1;
975       /* shift up all the later entries in this row */
976       for (ii=N; ii>=i; ii--) {
977         rp[ii+1] = rp[ii];
978         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
979       }
980       if (N>=i) {
981         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
982       }
983       rp[i]                      = bcol;
984       ap[bs2*i + bs*cidx + ridx] = value;
985       noinsert1:;
986       low = i;
987       /* } */
988         }
989       } /* end of if .. if..  */
990     }                     /* end of loop over added columns */
991     ailen[brow] = nrow;
992   }                       /* end of loop over added rows */
993 
994   PetscFunctionReturn(0);
995 }
996 
997 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
998 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
999 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
1000 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1001 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
1002 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1003 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1004 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
1005 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1006 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1007 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1008 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1009 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1010 extern int MatZeroEntries_SeqSBAIJ(Mat);
1011 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1012 
1013 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1019 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1020 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1026 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1027 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1028 
1029 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1030 
1031 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1037 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1038 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1039 
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1048 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1049 
1050 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1056 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1057 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1058 
1059 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1065 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1066 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1067 
1068 #ifdef HAVE_MatICCFactor
1069 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1072 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1073 {
1074   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1075   Mat         outA;
1076   int         ierr;
1077   PetscTruth  row_identity,col_identity;
1078 
1079   PetscFunctionBegin;
1080   /*
1081   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1082   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1083   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1084   if (!row_identity || !col_identity) {
1085     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1086   }
1087   */
1088 
1089   outA          = inA;
1090   inA->factor   = FACTOR_CHOLESKY;
1091 
1092   if (!a->diag) {
1093     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1094   }
1095   /*
1096       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1097       for ILU(0) factorization with natural ordering
1098   */
1099   switch (a->bs) {
1100   case 1:
1101     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1102     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1103     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1104     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1105   case 2:
1106     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1107     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1108     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1109     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1110     break;
1111   case 3:
1112     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1113     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1114     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1115     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1116     break;
1117   case 4:
1118     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1119     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1120     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1121     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1122     break;
1123   case 5:
1124     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1125     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1126     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1127     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1128     break;
1129   case 6:
1130     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1131     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1132     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1133     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1134     break;
1135   case 7:
1136     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1137     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1138     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1139     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1140     break;
1141   default:
1142     a->row        = row;
1143     a->icol       = col;
1144     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1145     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1146 
1147     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1148     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1149     PetscLogObjectParent(inA,a->icol);
1150 
1151     if (!a->solve_work) {
1152       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1153       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1154     }
1155   }
1156 
1157   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 #endif
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1165 int MatPrintHelp_SeqSBAIJ(Mat A)
1166 {
1167   static PetscTruth called = PETSC_FALSE;
1168   MPI_Comm          comm = A->comm;
1169   int               ierr;
1170 
1171   PetscFunctionBegin;
1172   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1173   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1174   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 EXTERN_C_BEGIN
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1181 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1182 {
1183   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1184   int         i,nz,n;
1185 
1186   PetscFunctionBegin;
1187   nz = baij->s_maxnz;
1188   n  = mat->n;
1189   for (i=0; i<nz; i++) {
1190     baij->j[i] = indices[i];
1191   }
1192   baij->s_nz = nz;
1193   for (i=0; i<n; i++) {
1194     baij->ilen[i] = baij->imax[i];
1195   }
1196 
1197   PetscFunctionReturn(0);
1198 }
1199 EXTERN_C_END
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1203 /*@
1204     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1205        in the matrix.
1206 
1207   Input Parameters:
1208 +  mat     - the SeqSBAIJ matrix
1209 -  indices - the column indices
1210 
1211   Level: advanced
1212 
1213   Notes:
1214     This can be called if you have precomputed the nonzero structure of the
1215   matrix and want to provide it to the matrix object to improve the performance
1216   of the MatSetValues() operation.
1217 
1218     You MUST have set the correct numbers of nonzeros per row in the call to
1219   MatCreateSeqSBAIJ().
1220 
1221     MUST be called before any calls to MatSetValues()
1222 
1223 .seealso: MatCreateSeqSBAIJ
1224 @*/
1225 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1226 {
1227   int ierr,(*f)(Mat,int *);
1228 
1229   PetscFunctionBegin;
1230   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1231   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1232   if (f) {
1233     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1234   } else {
1235     SETERRQ(1,"Wrong type of matrix to set column indices");
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1242 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1243 {
1244   int        ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1253 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1254 {
1255   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1256   PetscFunctionBegin;
1257   *array = a->a;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1263 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1264 {
1265   PetscFunctionBegin;
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include "petscblaslapack.h"
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1272 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1273 {
1274   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1275   int          ierr,one=1,i,bs=y->bs,bs2,j;
1276 
1277   PetscFunctionBegin;
1278   if (str == SAME_NONZERO_PATTERN) {
1279     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1280   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1281     if (y->xtoy && y->XtoY != X) {
1282       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1283       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1284     }
1285     if (!y->xtoy) { /* get xtoy */
1286       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1287       y->XtoY = X;
1288     }
1289     bs2 = bs*bs;
1290     for (i=0; i<x->s_nz; i++) {
1291       j = 0;
1292       while (j < bs2){
1293         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1294         j++;
1295       }
1296     }
1297     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1298   } else {
1299     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1306 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1307 {
1308   PetscFunctionBegin;
1309   *flg = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1315 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1316 {
1317   PetscFunctionBegin;
1318   *flg = PETSC_TRUE;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1324 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1325 {
1326   PetscFunctionBegin;
1327   *flg = PETSC_FALSE;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* -------------------------------------------------------------------*/
1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1333        MatGetRow_SeqSBAIJ,
1334        MatRestoreRow_SeqSBAIJ,
1335        MatMult_SeqSBAIJ_N,
1336 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1337        MatMultTranspose_SeqSBAIJ,
1338        MatMultTransposeAdd_SeqSBAIJ,
1339        MatSolve_SeqSBAIJ_N,
1340        0,
1341        0,
1342 /*10*/ 0,
1343        0,
1344        MatCholeskyFactor_SeqSBAIJ,
1345        MatRelax_SeqSBAIJ,
1346        MatTranspose_SeqSBAIJ,
1347 /*15*/ MatGetInfo_SeqSBAIJ,
1348        MatEqual_SeqSBAIJ,
1349        MatGetDiagonal_SeqSBAIJ,
1350        MatDiagonalScale_SeqSBAIJ,
1351        MatNorm_SeqSBAIJ,
1352 /*20*/ 0,
1353        MatAssemblyEnd_SeqSBAIJ,
1354        0,
1355        MatSetOption_SeqSBAIJ,
1356        MatZeroEntries_SeqSBAIJ,
1357 /*25*/ MatZeroRows_SeqSBAIJ,
1358        0,
1359        0,
1360        MatCholeskyFactorSymbolic_SeqSBAIJ,
1361        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1362 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1363        0,
1364        MatICCFactorSymbolic_SeqSBAIJ,
1365        MatGetArray_SeqSBAIJ,
1366        MatRestoreArray_SeqSBAIJ,
1367 /*35*/ MatDuplicate_SeqSBAIJ,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*40*/ MatAXPY_SeqSBAIJ,
1373        MatGetSubMatrices_SeqSBAIJ,
1374        MatIncreaseOverlap_SeqSBAIJ,
1375        MatGetValues_SeqSBAIJ,
1376        0,
1377 /*45*/ MatPrintHelp_SeqSBAIJ,
1378        MatScale_SeqSBAIJ,
1379        0,
1380        0,
1381        0,
1382 /*50*/ MatGetBlockSize_SeqSBAIJ,
1383        MatGetRowIJ_SeqSBAIJ,
1384        MatRestoreRowIJ_SeqSBAIJ,
1385        0,
1386        0,
1387 /*55*/ 0,
1388        0,
1389        0,
1390        0,
1391        MatSetValuesBlocked_SeqSBAIJ,
1392 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1393        0,
1394        0,
1395        MatGetPetscMaps_Petsc,
1396        0,
1397 /*65*/ 0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*70*/ MatGetRowMax_SeqSBAIJ,
1403        0,
1404        0,
1405        0,
1406        0,
1407 /*75*/ 0,
1408        0,
1409        0,
1410        0,
1411        0,
1412 /*80*/ 0,
1413        0,
1414        0,
1415 #if !defined(PETSC_USE_COMPLEX)
1416        MatGetInertia_SeqSBAIJ,
1417 #else
1418        0,
1419 #endif
1420 /*85*/ MatLoad_SeqSBAIJ,
1421        MatIsSymmetric_SeqSBAIJ,
1422        MatIsStructurallySymmetric_SeqSBAIJ,
1423        MatIsHermitian_SeqSBAIJ
1424 };
1425 
1426 EXTERN_C_BEGIN
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1429 int MatStoreValues_SeqSBAIJ(Mat mat)
1430 {
1431   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1432   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1433   int         ierr;
1434 
1435   PetscFunctionBegin;
1436   if (aij->nonew != 1) {
1437     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1438   }
1439 
1440   /* allocate space for values if not already there */
1441   if (!aij->saved_values) {
1442     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1443   }
1444 
1445   /* copy values over */
1446   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 EXTERN_C_END
1450 
1451 EXTERN_C_BEGIN
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1454 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1455 {
1456   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1457   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1458 
1459   PetscFunctionBegin;
1460   if (aij->nonew != 1) {
1461     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1462   }
1463   if (!aij->saved_values) {
1464     SETERRQ(1,"Must call MatStoreValues(A);first");
1465   }
1466 
1467   /* copy values over */
1468   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 EXTERN_C_END
1472 
1473 EXTERN_C_BEGIN
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1476 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1477 {
1478   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1479   int          i,len,ierr,mbs,bs2;
1480   PetscTruth   flg;
1481   int          s_nz;
1482 
1483   PetscFunctionBegin;
1484   B->preallocated = PETSC_TRUE;
1485   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1486   mbs  = B->m/bs;
1487   bs2  = bs*bs;
1488 
1489   if (mbs*bs != B->m) {
1490     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1491   }
1492 
1493   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1494   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1495   if (nnz) {
1496     for (i=0; i<mbs; i++) {
1497       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1498       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1499     }
1500   }
1501 
1502   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1503   if (!flg) {
1504     switch (bs) {
1505     case 1:
1506       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1507       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1508       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1509       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1510       B->ops->mult            = MatMult_SeqSBAIJ_1;
1511       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1512       break;
1513     case 2:
1514       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1515       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1516       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1517       B->ops->mult            = MatMult_SeqSBAIJ_2;
1518       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1519       break;
1520     case 3:
1521       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1522       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1523       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1524       B->ops->mult            = MatMult_SeqSBAIJ_3;
1525       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1526       break;
1527     case 4:
1528       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1529       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1530       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1531       B->ops->mult            = MatMult_SeqSBAIJ_4;
1532       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1533       break;
1534     case 5:
1535       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1536       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1537       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1538       B->ops->mult            = MatMult_SeqSBAIJ_5;
1539       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1540       break;
1541     case 6:
1542       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1543       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1544       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1545       B->ops->mult            = MatMult_SeqSBAIJ_6;
1546       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1547       break;
1548     case 7:
1549       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1550       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1551       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1552       B->ops->mult            = MatMult_SeqSBAIJ_7;
1553       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1554       break;
1555     }
1556   }
1557 
1558   b->mbs = mbs;
1559   b->nbs = mbs;
1560   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1561   if (!nnz) {
1562     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1563     else if (nz <= 0)        nz = 1;
1564     for (i=0; i<mbs; i++) {
1565       b->imax[i] = nz;
1566     }
1567     nz = nz*mbs; /* total nz */
1568   } else {
1569     nz = 0;
1570     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1571   }
1572   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1573   s_nz = nz;
1574 
1575   /* allocate the matrix space */
1576   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1577   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1578   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1579   b->j = (int*)(b->a + s_nz*bs2);
1580   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1581   b->i = b->j + s_nz;
1582   b->singlemalloc = PETSC_TRUE;
1583 
1584   /* pointer to beginning of each row */
1585   b->i[0] = 0;
1586   for (i=1; i<mbs+1; i++) {
1587     b->i[i] = b->i[i-1] + b->imax[i-1];
1588   }
1589 
1590   /* b->ilen will count nonzeros in each block row so far. */
1591   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1592   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1593   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1594 
1595   b->bs               = bs;
1596   b->bs2              = bs2;
1597   b->s_nz             = 0;
1598   b->s_maxnz          = s_nz*bs2;
1599 
1600   b->inew             = 0;
1601   b->jnew             = 0;
1602   b->anew             = 0;
1603   b->a2anew           = 0;
1604   b->permute          = PETSC_FALSE;
1605   PetscFunctionReturn(0);
1606 }
1607 EXTERN_C_END
1608 
1609 /*MC
1610    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1611    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1612 
1613    Options Database Keys:
1614 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1615 
1616   Level: beginner
1617 
1618 .seealso: MatCreateSeqSBAIJ
1619 M*/
1620 
1621 EXTERN_C_BEGIN
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1624 int MatCreate_SeqSBAIJ(Mat B)
1625 {
1626   Mat_SeqSBAIJ *b;
1627   int          ierr,size;
1628 
1629   PetscFunctionBegin;
1630   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1631   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1632   B->m = B->M = PetscMax(B->m,B->M);
1633   B->n = B->N = PetscMax(B->n,B->N);
1634 
1635   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1636   B->data = (void*)b;
1637   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1638   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1639   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1640   B->ops->view        = MatView_SeqSBAIJ;
1641   B->factor           = 0;
1642   B->lupivotthreshold = 1.0;
1643   B->mapping          = 0;
1644   b->row              = 0;
1645   b->icol             = 0;
1646   b->reallocs         = 0;
1647   b->saved_values     = 0;
1648 
1649   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1650   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1651 
1652   b->sorted           = PETSC_FALSE;
1653   b->roworiented      = PETSC_TRUE;
1654   b->nonew            = 0;
1655   b->diag             = 0;
1656   b->solve_work       = 0;
1657   b->mult_work        = 0;
1658   B->spptr            = 0;
1659   b->keepzeroedrows   = PETSC_FALSE;
1660   b->xtoy             = 0;
1661   b->XtoY             = 0;
1662 
1663   b->inew             = 0;
1664   b->jnew             = 0;
1665   b->anew             = 0;
1666   b->a2anew           = 0;
1667   b->permute          = PETSC_FALSE;
1668 
1669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1670                                      "MatStoreValues_SeqSBAIJ",
1671                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1673                                      "MatRetrieveValues_SeqSBAIJ",
1674                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1676                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1677                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1679                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1680                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 EXTERN_C_END
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1687 /*@C
1688    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1689    compressed row) format.  For good matrix assembly performance the
1690    user should preallocate the matrix storage by setting the parameter nz
1691    (or the array nnz).  By setting these parameters accurately, performance
1692    during matrix assembly can be increased by more than a factor of 50.
1693 
1694    Collective on Mat
1695 
1696    Input Parameters:
1697 +  A - the symmetric matrix
1698 .  bs - size of block
1699 .  nz - number of block nonzeros per block row (same for all rows)
1700 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1701          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1702 
1703    Options Database Keys:
1704 .   -mat_no_unroll - uses code that does not unroll the loops in the
1705                      block calculations (much slower)
1706 .    -mat_block_size - size of the blocks to use
1707 
1708    Level: intermediate
1709 
1710    Notes:
1711    Specify the preallocated storage with either nz or nnz (not both).
1712    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1713    allocation.  For additional details, see the users manual chapter on
1714    matrices.
1715 
1716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1717 @*/
1718 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1719   int ierr,(*f)(Mat,int,int,const int[]);
1720 
1721   PetscFunctionBegin;
1722   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1723   if (f) {
1724     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCreateSeqSBAIJ"
1731 /*@C
1732    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1733    compressed row) format.  For good matrix assembly performance the
1734    user should preallocate the matrix storage by setting the parameter nz
1735    (or the array nnz).  By setting these parameters accurately, performance
1736    during matrix assembly can be increased by more than a factor of 50.
1737 
1738    Collective on MPI_Comm
1739 
1740    Input Parameters:
1741 +  comm - MPI communicator, set to PETSC_COMM_SELF
1742 .  bs - size of block
1743 .  m - number of rows, or number of columns
1744 .  nz - number of block nonzeros per block row (same for all rows)
1745 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1746          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1747 
1748    Output Parameter:
1749 .  A - the symmetric matrix
1750 
1751    Options Database Keys:
1752 .   -mat_no_unroll - uses code that does not unroll the loops in the
1753                      block calculations (much slower)
1754 .    -mat_block_size - size of the blocks to use
1755 
1756    Level: intermediate
1757 
1758    Notes:
1759 
1760    Specify the preallocated storage with either nz or nnz (not both).
1761    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1762    allocation.  For additional details, see the users manual chapter on
1763    matrices.
1764 
1765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1766 @*/
1767 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1768 {
1769   int ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1773   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1774   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1780 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1781 {
1782   Mat         C;
1783   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1784   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1785 
1786   PetscFunctionBegin;
1787   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1788 
1789   *B = 0;
1790   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1791   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1792   c    = (Mat_SeqSBAIJ*)C->data;
1793 
1794   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1795   C->preallocated   = PETSC_TRUE;
1796   C->factor         = A->factor;
1797   c->row            = 0;
1798   c->icol           = 0;
1799   c->saved_values   = 0;
1800   c->keepzeroedrows = a->keepzeroedrows;
1801   C->assembled      = PETSC_TRUE;
1802 
1803   c->bs         = a->bs;
1804   c->bs2        = a->bs2;
1805   c->mbs        = a->mbs;
1806   c->nbs        = a->nbs;
1807 
1808   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1809   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1810   for (i=0; i<mbs; i++) {
1811     c->imax[i] = a->imax[i];
1812     c->ilen[i] = a->ilen[i];
1813   }
1814 
1815   /* allocate the matrix space */
1816   c->singlemalloc = PETSC_TRUE;
1817   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1818   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1819   c->j = (int*)(c->a + nz*bs2);
1820   c->i = c->j + nz;
1821   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1822   if (mbs > 0) {
1823     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1824     if (cpvalues == MAT_COPY_VALUES) {
1825       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1826     } else {
1827       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1828     }
1829   }
1830 
1831   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1832   c->sorted      = a->sorted;
1833   c->roworiented = a->roworiented;
1834   c->nonew       = a->nonew;
1835 
1836   if (a->diag) {
1837     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1838     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1839     for (i=0; i<mbs; i++) {
1840       c->diag[i] = a->diag[i];
1841     }
1842   } else c->diag        = 0;
1843   c->s_nz               = a->s_nz;
1844   c->s_maxnz            = a->s_maxnz;
1845   c->solve_work         = 0;
1846   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1847   c->mult_work          = 0;
1848   *B = C;
1849   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1855 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1856 {
1857   Mat_SeqSBAIJ *a;
1858   Mat          B;
1859   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1860   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1861   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1862   int          *masked,nmask,tmp,bs2,ishift;
1863   PetscScalar  *aa;
1864   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1865 
1866   PetscFunctionBegin;
1867   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1868   bs2  = bs*bs;
1869 
1870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1871   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1872   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1873   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1874   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1875   M = header[1]; N = header[2]; nz = header[3];
1876 
1877   if (header[3] < 0) {
1878     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1879   }
1880 
1881   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1882 
1883   /*
1884      This code adds extra rows to make sure the number of rows is
1885     divisible by the blocksize
1886   */
1887   mbs        = M/bs;
1888   extra_rows = bs - M + bs*(mbs);
1889   if (extra_rows == bs) extra_rows = 0;
1890   else                  mbs++;
1891   if (extra_rows) {
1892     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1893   }
1894 
1895   /* read in row lengths */
1896   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1897   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1898   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1899 
1900   /* read in column indices */
1901   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1902   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1903   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1904 
1905   /* loop over row lengths determining block row lengths */
1906   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1907   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1908   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1909   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1910   masked   = mask + mbs;
1911   rowcount = 0; nzcount = 0;
1912   for (i=0; i<mbs; i++) {
1913     nmask = 0;
1914     for (j=0; j<bs; j++) {
1915       kmax = rowlengths[rowcount];
1916       for (k=0; k<kmax; k++) {
1917         tmp = jj[nzcount++]/bs;   /* block col. index */
1918         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1919       }
1920       rowcount++;
1921     }
1922     s_browlengths[i] += nmask;
1923 
1924     /* zero out the mask elements we set */
1925     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1926   }
1927 
1928   /* create our matrix */
1929   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1930   ierr = MatSetType(B,type);CHKERRQ(ierr);
1931   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1932   a = (Mat_SeqSBAIJ*)B->data;
1933 
1934   /* set matrix "i" values */
1935   a->i[0] = 0;
1936   for (i=1; i<= mbs; i++) {
1937     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1938     a->ilen[i-1] = s_browlengths[i-1];
1939   }
1940   a->s_nz = a->i[mbs];
1941 
1942   /* read in nonzero values */
1943   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1944   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1945   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1946 
1947   /* set "a" and "j" values into matrix */
1948   nzcount = 0; jcount = 0;
1949   for (i=0; i<mbs; i++) {
1950     nzcountb = nzcount;
1951     nmask    = 0;
1952     for (j=0; j<bs; j++) {
1953       kmax = rowlengths[i*bs+j];
1954       for (k=0; k<kmax; k++) {
1955         tmp = jj[nzcount++]/bs; /* block col. index */
1956         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1957       }
1958     }
1959     /* sort the masked values */
1960     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1961 
1962     /* set "j" values into matrix */
1963     maskcount = 1;
1964     for (j=0; j<nmask; j++) {
1965       a->j[jcount++]  = masked[j];
1966       mask[masked[j]] = maskcount++;
1967     }
1968 
1969     /* set "a" values into matrix */
1970     ishift = bs2*a->i[i];
1971     for (j=0; j<bs; j++) {
1972       kmax = rowlengths[i*bs+j];
1973       for (k=0; k<kmax; k++) {
1974         tmp       = jj[nzcountb]/bs ; /* block col. index */
1975         if (tmp >= i){
1976           block     = mask[tmp] - 1;
1977           point     = jj[nzcountb] - bs*tmp;
1978           idx       = ishift + bs2*block + j + bs*point;
1979           a->a[idx] = aa[nzcountb];
1980         }
1981         nzcountb++;
1982       }
1983     }
1984     /* zero out the mask elements we set */
1985     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1986   }
1987   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1988 
1989   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1990   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1991   ierr = PetscFree(aa);CHKERRQ(ierr);
1992   ierr = PetscFree(jj);CHKERRQ(ierr);
1993   ierr = PetscFree(mask);CHKERRQ(ierr);
1994 
1995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   ierr = MatView_Private(B);CHKERRQ(ierr);
1998   *A = B;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2004 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2005 {
2006   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2007   MatScalar    *aa=a->a,*v,*v1;
2008   PetscScalar  *x,*b,*t,sum,d;
2009   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
2010   int          nz,nz1,*vj,*vj1,i;
2011 
2012   PetscFunctionBegin;
2013   its = its*lits;
2014   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2015 
2016   if (bs > 1)
2017     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2018 
2019   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2020   if (xx != bb) {
2021     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2022   } else {
2023     b = x;
2024   }
2025 
2026   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2027 
2028   if (flag & SOR_ZERO_INITIAL_GUESS) {
2029     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2030       for (i=0; i<m; i++)
2031         t[i] = b[i];
2032 
2033       for (i=0; i<m; i++){
2034         d  = *(aa + ai[i]);  /* diag[i] */
2035         v  = aa + ai[i] + 1;
2036         vj = aj + ai[i] + 1;
2037         nz = ai[i+1] - ai[i] - 1;
2038         x[i] = omega*t[i]/d;
2039         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2040         PetscLogFlops(2*nz-1);
2041       }
2042     }
2043 
2044     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2045       for (i=0; i<m; i++)
2046         t[i] = b[i];
2047 
2048       for (i=0; i<m-1; i++){  /* update rhs */
2049         v  = aa + ai[i] + 1;
2050         vj = aj + ai[i] + 1;
2051         nz = ai[i+1] - ai[i] - 1;
2052         while (nz--) t[*vj++] -= x[i]*(*v++);
2053         PetscLogFlops(2*nz-1);
2054       }
2055       for (i=m-1; i>=0; i--){
2056         d  = *(aa + ai[i]);
2057         v  = aa + ai[i] + 1;
2058         vj = aj + ai[i] + 1;
2059         nz = ai[i+1] - ai[i] - 1;
2060         sum = t[i];
2061         while (nz--) sum -= x[*vj++]*(*v++);
2062         PetscLogFlops(2*nz-1);
2063         x[i] =   (1-omega)*x[i] + omega*sum/d;
2064       }
2065     }
2066     its--;
2067   }
2068 
2069   while (its--) {
2070     /*
2071        forward sweep:
2072        for i=0,...,m-1:
2073          sum[i] = (b[i] - U(i,:)x )/d[i];
2074          x[i]   = (1-omega)x[i] + omega*sum[i];
2075          b      = b - x[i]*U^T(i,:);
2076 
2077     */
2078     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2079       for (i=0; i<m; i++)
2080         t[i] = b[i];
2081 
2082       for (i=0; i<m; i++){
2083         d  = *(aa + ai[i]);  /* diag[i] */
2084         v  = aa + ai[i] + 1; v1=v;
2085         vj = aj + ai[i] + 1; vj1=vj;
2086         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2087         sum = t[i];
2088         while (nz1--) sum -= (*v1++)*x[*vj1++];
2089         x[i] = (1-omega)*x[i] + omega*sum/d;
2090         while (nz--) t[*vj++] -= x[i]*(*v++);
2091         PetscLogFlops(4*nz-2);
2092       }
2093     }
2094 
2095   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2096       /*
2097        backward sweep:
2098        b = b - x[i]*U^T(i,:), i=0,...,n-2
2099        for i=m-1,...,0:
2100          sum[i] = (b[i] - U(i,:)x )/d[i];
2101          x[i]   = (1-omega)x[i] + omega*sum[i];
2102       */
2103       for (i=0; i<m; i++)
2104         t[i] = b[i];
2105 
2106       for (i=0; i<m-1; i++){  /* update rhs */
2107         v  = aa + ai[i] + 1;
2108         vj = aj + ai[i] + 1;
2109         nz = ai[i+1] - ai[i] - 1;
2110         while (nz--) t[*vj++] -= x[i]*(*v++);
2111         PetscLogFlops(2*nz-1);
2112       }
2113       for (i=m-1; i>=0; i--){
2114         d  = *(aa + ai[i]);
2115         v  = aa + ai[i] + 1;
2116         vj = aj + ai[i] + 1;
2117         nz = ai[i+1] - ai[i] - 1;
2118         sum = t[i];
2119         while (nz--) sum -= x[*vj++]*(*v++);
2120         PetscLogFlops(2*nz-1);
2121         x[i] =   (1-omega)*x[i] + omega*sum/d;
2122       }
2123     }
2124   }
2125 
2126   ierr = PetscFree(t); CHKERRQ(ierr);
2127   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2128   if (bb != xx) {
2129     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2130   }
2131 
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 
2136 
2137 
2138 
2139 
2140