xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 0bad918300f840a56f43416dc3932aa97bd6fa72)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
560     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
565       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
595 {
596   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
597   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth      roworiented=a->roworiented;
601   const MatScalar *value = v;
602   MatScalar       *ap,*aa = a->a,*bap;
603 
604   PetscFunctionBegin;
605   if (roworiented) {
606     stepval = (n-1)*bs;
607   } else {
608     stepval = (m-1)*bs;
609   }
610   for (k=0; k<m; k++) { /* loop over added rows */
611     row  = im[k];
612     if (row < 0) continue;
613 #if defined(PETSC_USE_BOPT_g)
614     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
615 #endif
616     rp   = aj + ai[row];
617     ap   = aa + bs2*ai[row];
618     rmax = imax[row];
619     nrow = ailen[row];
620     low  = 0;
621     for (l=0; l<n; l++) { /* loop over added columns */
622       if (in[l] < 0) continue;
623       col = in[l];
624 #if defined(PETSC_USE_BOPT_g)
625       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
626 #endif
627       if (col < row) continue; /* ignore lower triangular block */
628       if (roworiented) {
629         value = v + k*(stepval+bs)*bs + l*bs;
630       } else {
631         value = v + l*(stepval+bs)*bs + k*bs;
632       }
633       if (!sorted) low = 0; high = nrow;
634       while (high-low > 7) {
635         t = (low+high)/2;
636         if (rp[t] > col) high = t;
637         else             low  = t;
638       }
639       for (i=low; i<high; i++) {
640         if (rp[i] > col) break;
641         if (rp[i] == col) {
642           bap  = ap +  bs2*i;
643           if (roworiented) {
644             if (is == ADD_VALUES) {
645               for (ii=0; ii<bs; ii++,value+=stepval) {
646                 for (jj=ii; jj<bs2; jj+=bs) {
647                   bap[jj] += *value++;
648                 }
649               }
650             } else {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=ii; jj<bs2; jj+=bs) {
653                   bap[jj] = *value++;
654                 }
655               }
656             }
657           } else {
658             if (is == ADD_VALUES) {
659               for (ii=0; ii<bs; ii++,value+=stepval) {
660                 for (jj=0; jj<bs; jj++) {
661                   *bap++ += *value++;
662                 }
663               }
664             } else {
665               for (ii=0; ii<bs; ii++,value+=stepval) {
666                 for (jj=0; jj<bs; jj++) {
667                   *bap++  = *value++;
668                 }
669               }
670             }
671           }
672           goto noinsert2;
673         }
674       }
675       if (nonew == 1) goto noinsert2;
676       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
677       if (nrow >= rmax) {
678         /* there is no extra room in row, therefore enlarge */
679         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
680         MatScalar *new_a;
681 
682         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
683 
684         /* malloc new storage space */
685         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
686 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
687         new_j   = (int*)(new_a + bs2*new_nz);
688         new_i   = new_j + new_nz;
689 
690         /* copy over old data into new slots */
691         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
692         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
693         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
694         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
695         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
698         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
699         /* free up old matrix storage */
700         ierr = PetscFree(a->a);CHKERRQ(ierr);
701         if (!a->singlemalloc) {
702           ierr = PetscFree(a->i);CHKERRQ(ierr);
703           ierr = PetscFree(a->j);CHKERRQ(ierr);
704         }
705         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
706         a->singlemalloc = PETSC_TRUE;
707 
708         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
709         rmax = imax[row] = imax[row] + CHUNKSIZE;
710         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
711         a->s_maxnz += bs2*CHUNKSIZE;
712         a->reallocs++;
713         a->s_nz++;
714       }
715       N = nrow++ - 1;
716       /* shift up all the later entries in this row */
717       for (ii=N; ii>=i; ii--) {
718         rp[ii+1] = rp[ii];
719         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
720       }
721       if (N >= i) {
722         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       rp[i] = col;
725       bap   = ap +  bs2*i;
726       if (roworiented) {
727         for (ii=0; ii<bs; ii++,value+=stepval) {
728           for (jj=ii; jj<bs2; jj+=bs) {
729             bap[jj] = *value++;
730           }
731         }
732       } else {
733         for (ii=0; ii<bs; ii++,value+=stepval) {
734           for (jj=0; jj<bs; jj++) {
735             *bap++  = *value++;
736           }
737         }
738       }
739       noinsert2:;
740       low = i;
741     }
742     ailen[row] = nrow;
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
750 {
751   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
752   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
753   int        m = A->m,*ip,N,*ailen = a->ilen;
754   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
755   MatScalar  *aa = a->a,*ap;
756 
757   PetscFunctionBegin;
758   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
759 
760   if (m) rmax = ailen[0];
761   for (i=1; i<mbs; i++) {
762     /* move each row back by the amount of empty slots (fshift) before it*/
763     fshift += imax[i-1] - ailen[i-1];
764     rmax   = PetscMax(rmax,ailen[i]);
765     if (fshift) {
766       ip = aj + ai[i]; ap = aa + bs2*ai[i];
767       N = ailen[i];
768       for (j=0; j<N; j++) {
769         ip[j-fshift] = ip[j];
770         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
771       }
772     }
773     ai[i] = ai[i-1] + ailen[i-1];
774   }
775   if (mbs) {
776     fshift += imax[mbs-1] - ailen[mbs-1];
777     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
778   }
779   /* reset ilen and imax for each row */
780   for (i=0; i<mbs; i++) {
781     ailen[i] = imax[i] = ai[i+1] - ai[i];
782   }
783   a->s_nz = ai[mbs];
784 
785   /* diagonals may have moved, reset it */
786   if (a->diag) {
787     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
788   }
789   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
790            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
791   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
792            a->reallocs);
793   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
794   a->reallocs          = 0;
795   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
796 
797   PetscFunctionReturn(0);
798 }
799 
800 /*
801    This function returns an array of flags which indicate the locations of contiguous
802    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
803    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
804    Assume: sizes should be long enough to hold all the values.
805 */
806 #undef __FUNCT__
807 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
808 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
809 {
810   int        i,j,k,row;
811   PetscTruth flg;
812 
813   PetscFunctionBegin;
814   for (i=0,j=0; i<n; j++) {
815     row = idx[i];
816     if (row%bs!=0) { /* Not the begining of a block */
817       sizes[j] = 1;
818       i++;
819     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
820       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
821       i++;
822     } else { /* Begining of the block, so check if the complete block exists */
823       flg = PETSC_TRUE;
824       for (k=1; k<bs; k++) {
825         if (row+k != idx[i+k]) { /* break in the block */
826           flg = PETSC_FALSE;
827           break;
828         }
829       }
830       if (flg == PETSC_TRUE) { /* No break in the bs */
831         sizes[j] = bs;
832         i+= bs;
833       } else {
834         sizes[j] = 1;
835         i++;
836       }
837     }
838   }
839   *bs_max = j;
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
845 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
846 {
847   PetscFunctionBegin;
848   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
849 }
850 
851 /* Only add/insert a(i,j) with i<=j (blocks).
852    Any a(i,j) with i>j input by user is ingored.
853 */
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
857 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
858 {
859   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
860   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
861   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
862   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
863   int         ridx,cidx,bs2=a->bs2,ierr;
864   MatScalar   *ap,value,*aa=a->a,*bap;
865 
866   PetscFunctionBegin;
867 
868   for (k=0; k<m; k++) { /* loop over added rows */
869     row  = im[k];       /* row number */
870     brow = row/bs;      /* block row number */
871     if (row < 0) continue;
872 #if defined(PETSC_USE_BOPT_g)
873     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
874 #endif
875     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
876     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
877     rmax = imax[brow];  /* maximum space allocated for this row */
878     nrow = ailen[brow]; /* actual length of this row */
879     low  = 0;
880 
881     for (l=0; l<n; l++) { /* loop over added columns */
882       if (in[l] < 0) continue;
883 #if defined(PETSC_USE_BOPT_g)
884       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
885 #endif
886       col = in[l];
887       bcol = col/bs;              /* block col number */
888 
889       if (brow <= bcol){
890         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
891         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
892           /* element value a(k,l) */
893           if (roworiented) {
894             value = v[l + k*n];
895           } else {
896             value = v[k + l*m];
897           }
898 
899           /* move pointer bap to a(k,l) quickly and add/insert value */
900           if (!sorted) low = 0; high = nrow;
901           while (high-low > 7) {
902             t = (low+high)/2;
903             if (rp[t] > bcol) high = t;
904             else              low  = t;
905           }
906           for (i=low; i<high; i++) {
907             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
908             if (rp[i] > bcol) break;
909             if (rp[i] == bcol) {
910               bap  = ap +  bs2*i + bs*cidx + ridx;
911               if (is == ADD_VALUES) *bap += value;
912               else                  *bap  = value;
913               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
914               if (brow == bcol && ridx < cidx){
915                 bap  = ap +  bs2*i + bs*ridx + cidx;
916                 if (is == ADD_VALUES) *bap += value;
917                 else                  *bap  = value;
918               }
919               goto noinsert1;
920             }
921           }
922 
923       if (nonew == 1) goto noinsert1;
924       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
925       if (nrow >= rmax) {
926         /* there is no extra room in row, therefore enlarge */
927         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
928         MatScalar *new_a;
929 
930         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
931 
932         /* Malloc new storage space */
933         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
934         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
935         new_j = (int*)(new_a + bs2*new_nz);
936         new_i = new_j + new_nz;
937 
938         /* copy over old data into new slots */
939         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
940         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
941         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
942         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
943         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
944         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
945         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
946         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
947         /* free up old matrix storage */
948         ierr = PetscFree(a->a);CHKERRQ(ierr);
949         if (!a->singlemalloc) {
950           ierr = PetscFree(a->i);CHKERRQ(ierr);
951           ierr = PetscFree(a->j);CHKERRQ(ierr);
952         }
953         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
954         a->singlemalloc = PETSC_TRUE;
955 
956         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
957         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
958         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
959         a->s_maxnz += bs2*CHUNKSIZE;
960         a->reallocs++;
961         a->s_nz++;
962       }
963 
964       N = nrow++ - 1;
965       /* shift up all the later entries in this row */
966       for (ii=N; ii>=i; ii--) {
967         rp[ii+1] = rp[ii];
968         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
969       }
970       if (N>=i) {
971         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
972       }
973       rp[i]                      = bcol;
974       ap[bs2*i + bs*cidx + ridx] = value;
975       noinsert1:;
976       low = i;
977       /* } */
978         }
979       } /* end of if .. if..  */
980     }                     /* end of loop over added columns */
981     ailen[brow] = nrow;
982   }                       /* end of loop over added rows */
983 
984   PetscFunctionReturn(0);
985 }
986 
987 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
988 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
989 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
990 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
991 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
992 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
993 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
994 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
995 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
996 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
997 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
998 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
999 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1000 extern int MatZeroEntries_SeqSBAIJ(Mat);
1001 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1002 
1003 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1004 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1005 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1006 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1007 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1008 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1009 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1011 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1012 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1013 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1014 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1015 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1016 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1018 
1019 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1020 
1021 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1022 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1023 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1024 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1025 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1029 
1030 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1031 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1032 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1038 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1039 
1040 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1041 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1042 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1043 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1044 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1048 
1049 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1050 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1051 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1052 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1057 
1058 #ifdef HAVE_MatICCFactor
1059 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1062 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1063 {
1064   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1065   Mat         outA;
1066   int         ierr;
1067   PetscTruth  row_identity,col_identity;
1068 
1069   PetscFunctionBegin;
1070   /*
1071   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1072   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1073   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1074   if (!row_identity || !col_identity) {
1075     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1076   }
1077   */
1078 
1079   outA          = inA;
1080   inA->factor   = FACTOR_CHOLESKY;
1081 
1082   if (!a->diag) {
1083     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1084   }
1085   /*
1086       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1087       for ILU(0) factorization with natural ordering
1088   */
1089   switch (a->bs) {
1090   case 1:
1091     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1092     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1093     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1094     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1095   case 2:
1096     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1097     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1098     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1099     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1100     break;
1101   case 3:
1102     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1103     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1104     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1105     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1106     break;
1107   case 4:
1108     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1109     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1110     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1111     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1112     break;
1113   case 5:
1114     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1115     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1116     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1117     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1118     break;
1119   case 6:
1120     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1121     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1122     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1123     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1124     break;
1125   case 7:
1126     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1127     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1128     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1129     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1130     break;
1131   default:
1132     a->row        = row;
1133     a->icol       = col;
1134     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1135     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1136 
1137     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1138     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1139     PetscLogObjectParent(inA,a->icol);
1140 
1141     if (!a->solve_work) {
1142       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1143       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1144     }
1145   }
1146 
1147   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 #endif
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1155 int MatPrintHelp_SeqSBAIJ(Mat A)
1156 {
1157   static PetscTruth called = PETSC_FALSE;
1158   MPI_Comm          comm = A->comm;
1159   int               ierr;
1160 
1161   PetscFunctionBegin;
1162   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1163   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1164   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 EXTERN_C_BEGIN
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1171 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1172 {
1173   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1174   int         i,nz,n;
1175 
1176   PetscFunctionBegin;
1177   nz = baij->s_maxnz;
1178   n  = mat->n;
1179   for (i=0; i<nz; i++) {
1180     baij->j[i] = indices[i];
1181   }
1182   baij->s_nz = nz;
1183   for (i=0; i<n; i++) {
1184     baij->ilen[i] = baij->imax[i];
1185   }
1186 
1187   PetscFunctionReturn(0);
1188 }
1189 EXTERN_C_END
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1193 /*@
1194     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1195        in the matrix.
1196 
1197   Input Parameters:
1198 +  mat     - the SeqSBAIJ matrix
1199 -  indices - the column indices
1200 
1201   Level: advanced
1202 
1203   Notes:
1204     This can be called if you have precomputed the nonzero structure of the
1205   matrix and want to provide it to the matrix object to improve the performance
1206   of the MatSetValues() operation.
1207 
1208     You MUST have set the correct numbers of nonzeros per row in the call to
1209   MatCreateSeqSBAIJ().
1210 
1211     MUST be called before any calls to MatSetValues()
1212 
1213 .seealso: MatCreateSeqSBAIJ
1214 @*/
1215 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1216 {
1217   int ierr,(*f)(Mat,int *);
1218 
1219   PetscFunctionBegin;
1220   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1221   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1222   if (f) {
1223     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1224   } else {
1225     SETERRQ(1,"Wrong type of matrix to set column indices");
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1232 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1233 {
1234   int        ierr;
1235 
1236   PetscFunctionBegin;
1237   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1243 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1244 {
1245   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1246   PetscFunctionBegin;
1247   *array = a->a;
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1253 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1254 {
1255   PetscFunctionBegin;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #include "petscblaslapack.h"
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1262 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1263 {
1264   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1265   int          ierr,one=1,i,bs=y->bs,bs2,j;
1266 
1267   PetscFunctionBegin;
1268   if (str == SAME_NONZERO_PATTERN) {
1269     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1270   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1271     if (y->xtoy && y->XtoY != X) {
1272       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1273       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1274     }
1275     if (!y->xtoy) { /* get xtoy */
1276       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1277       y->XtoY = X;
1278     }
1279     bs2 = bs*bs;
1280     for (i=0; i<x->s_nz; i++) {
1281       j = 0;
1282       while (j < bs2){
1283         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1284         j++;
1285       }
1286     }
1287     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1288   } else {
1289     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 /* -------------------------------------------------------------------*/
1295 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1296        MatGetRow_SeqSBAIJ,
1297        MatRestoreRow_SeqSBAIJ,
1298        MatMult_SeqSBAIJ_N,
1299        MatMultAdd_SeqSBAIJ_N,
1300        MatMultTranspose_SeqSBAIJ,
1301        MatMultTransposeAdd_SeqSBAIJ,
1302        MatSolve_SeqSBAIJ_N,
1303        0,
1304        0,
1305        0,
1306        0,
1307        MatCholeskyFactor_SeqSBAIJ,
1308        MatRelax_SeqSBAIJ,
1309        MatTranspose_SeqSBAIJ,
1310        MatGetInfo_SeqSBAIJ,
1311        MatEqual_SeqSBAIJ,
1312        MatGetDiagonal_SeqSBAIJ,
1313        MatDiagonalScale_SeqSBAIJ,
1314        MatNorm_SeqSBAIJ,
1315        0,
1316        MatAssemblyEnd_SeqSBAIJ,
1317        0,
1318        MatSetOption_SeqSBAIJ,
1319        MatZeroEntries_SeqSBAIJ,
1320        MatZeroRows_SeqSBAIJ,
1321        0,
1322        0,
1323        MatCholeskyFactorSymbolic_SeqSBAIJ,
1324        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1325        MatSetUpPreallocation_SeqSBAIJ,
1326        0,
1327        MatICCFactorSymbolic_SeqSBAIJ,
1328        MatGetArray_SeqSBAIJ,
1329        MatRestoreArray_SeqSBAIJ,
1330        MatDuplicate_SeqSBAIJ,
1331        0,
1332        0,
1333        0,
1334        0,
1335        MatAXPY_SeqSBAIJ,
1336        MatGetSubMatrices_SeqSBAIJ,
1337        MatIncreaseOverlap_SeqSBAIJ,
1338        MatGetValues_SeqSBAIJ,
1339        0,
1340        MatPrintHelp_SeqSBAIJ,
1341        MatScale_SeqSBAIJ,
1342        0,
1343        0,
1344        0,
1345        MatGetBlockSize_SeqSBAIJ,
1346        MatGetRowIJ_SeqSBAIJ,
1347        MatRestoreRowIJ_SeqSBAIJ,
1348        0,
1349        0,
1350        0,
1351        0,
1352        0,
1353        0,
1354        MatSetValuesBlocked_SeqSBAIJ,
1355        MatGetSubMatrix_SeqSBAIJ,
1356        0,
1357        0,
1358        MatGetPetscMaps_Petsc,
1359        0,
1360        0,
1361        0,
1362        0,
1363        0,
1364        0,
1365        MatGetRowMax_SeqSBAIJ,
1366        0,
1367        0,
1368        0,
1369        0,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379 #if !defined(PETSC_USE_COMPLEX)
1380        MatGetInertia_SeqSBAIJ
1381 #else
1382        0
1383 #endif
1384 };
1385 
1386 EXTERN_C_BEGIN
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1389 int MatStoreValues_SeqSBAIJ(Mat mat)
1390 {
1391   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1392   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1393   int         ierr;
1394 
1395   PetscFunctionBegin;
1396   if (aij->nonew != 1) {
1397     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1398   }
1399 
1400   /* allocate space for values if not already there */
1401   if (!aij->saved_values) {
1402     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1403   }
1404 
1405   /* copy values over */
1406   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 EXTERN_C_END
1410 
1411 EXTERN_C_BEGIN
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1414 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1415 {
1416   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1417   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1418 
1419   PetscFunctionBegin;
1420   if (aij->nonew != 1) {
1421     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1422   }
1423   if (!aij->saved_values) {
1424     SETERRQ(1,"Must call MatStoreValues(A);first");
1425   }
1426 
1427   /* copy values over */
1428   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 EXTERN_C_END
1432 
1433 EXTERN_C_BEGIN
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1436 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1437 {
1438   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1439   int          i,len,ierr,mbs,bs2;
1440   PetscTruth   flg;
1441   int          s_nz;
1442 
1443   PetscFunctionBegin;
1444   B->preallocated = PETSC_TRUE;
1445   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1446   mbs  = B->m/bs;
1447   bs2  = bs*bs;
1448 
1449   if (mbs*bs != B->m) {
1450     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1451   }
1452 
1453   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1454   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1455   if (nnz) {
1456     for (i=0; i<mbs; i++) {
1457       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1458       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1459     }
1460   }
1461 
1462   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1463   if (!flg) {
1464     switch (bs) {
1465     case 1:
1466       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1467       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1468       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1469       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1470       B->ops->mult            = MatMult_SeqSBAIJ_1;
1471       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1472       break;
1473     case 2:
1474       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1475       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1476       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1477       B->ops->mult            = MatMult_SeqSBAIJ_2;
1478       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1479       break;
1480     case 3:
1481       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1482       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1483       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1484       B->ops->mult            = MatMult_SeqSBAIJ_3;
1485       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1486       break;
1487     case 4:
1488       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1489       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1490       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1491       B->ops->mult            = MatMult_SeqSBAIJ_4;
1492       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1493       break;
1494     case 5:
1495       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1496       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1497       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1498       B->ops->mult            = MatMult_SeqSBAIJ_5;
1499       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1500       break;
1501     case 6:
1502       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1503       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1504       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1505       B->ops->mult            = MatMult_SeqSBAIJ_6;
1506       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1507       break;
1508     case 7:
1509       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1510       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1511       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1512       B->ops->mult            = MatMult_SeqSBAIJ_7;
1513       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1514       break;
1515     }
1516   }
1517 
1518   b->mbs = mbs;
1519   b->nbs = mbs;
1520   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1521   if (!nnz) {
1522     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1523     else if (nz <= 0)        nz = 1;
1524     for (i=0; i<mbs; i++) {
1525       b->imax[i] = nz;
1526     }
1527     nz = nz*mbs; /* total nz */
1528   } else {
1529     nz = 0;
1530     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1531   }
1532   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1533   s_nz = nz;
1534 
1535   /* allocate the matrix space */
1536   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1537   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1538   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1539   b->j = (int*)(b->a + s_nz*bs2);
1540   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1541   b->i = b->j + s_nz;
1542   b->singlemalloc = PETSC_TRUE;
1543 
1544   /* pointer to beginning of each row */
1545   b->i[0] = 0;
1546   for (i=1; i<mbs+1; i++) {
1547     b->i[i] = b->i[i-1] + b->imax[i-1];
1548   }
1549 
1550   /* b->ilen will count nonzeros in each block row so far. */
1551   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1552   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1553   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1554 
1555   b->bs               = bs;
1556   b->bs2              = bs2;
1557   b->s_nz             = 0;
1558   b->s_maxnz          = s_nz*bs2;
1559 
1560   b->inew             = 0;
1561   b->jnew             = 0;
1562   b->anew             = 0;
1563   b->a2anew           = 0;
1564   b->permute          = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 EXTERN_C_END
1568 
1569 /*MC
1570    MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1571    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1572 
1573    Options Database Keys:
1574 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1575 
1576   Level: beginner
1577 
1578 .seealso: MatCreateSeqSBAIJ
1579 M*/
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1584 int MatCreate_SeqSBAIJ(Mat B)
1585 {
1586   Mat_SeqSBAIJ *b;
1587   int          ierr,size;
1588 
1589   PetscFunctionBegin;
1590   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1591   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1592   B->m = B->M = PetscMax(B->m,B->M);
1593   B->n = B->N = PetscMax(B->n,B->N);
1594 
1595   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1596   B->data = (void*)b;
1597   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1598   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1599   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1600   B->ops->view        = MatView_SeqSBAIJ;
1601   B->factor           = 0;
1602   B->lupivotthreshold = 1.0;
1603   B->mapping          = 0;
1604   b->row              = 0;
1605   b->icol             = 0;
1606   b->reallocs         = 0;
1607   b->saved_values     = 0;
1608 
1609   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1610   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1611 
1612   b->sorted           = PETSC_FALSE;
1613   b->roworiented      = PETSC_TRUE;
1614   b->nonew            = 0;
1615   b->diag             = 0;
1616   b->solve_work       = 0;
1617   b->mult_work        = 0;
1618   B->spptr            = 0;
1619   b->keepzeroedrows   = PETSC_FALSE;
1620   b->xtoy             = 0;
1621   b->XtoY             = 0;
1622 
1623   b->inew             = 0;
1624   b->jnew             = 0;
1625   b->anew             = 0;
1626   b->a2anew           = 0;
1627   b->permute          = PETSC_FALSE;
1628 
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1630                                      "MatStoreValues_SeqSBAIJ",
1631                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1633                                      "MatRetrieveValues_SeqSBAIJ",
1634                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1636                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1637                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1639                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1640                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1641   PetscFunctionReturn(0);
1642 }
1643 EXTERN_C_END
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1647 /*@C
1648    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1649    compressed row) format.  For good matrix assembly performance the
1650    user should preallocate the matrix storage by setting the parameter nz
1651    (or the array nnz).  By setting these parameters accurately, performance
1652    during matrix assembly can be increased by more than a factor of 50.
1653 
1654    Collective on Mat
1655 
1656    Input Parameters:
1657 +  A - the symmetric matrix
1658 .  bs - size of block
1659 .  nz - number of block nonzeros per block row (same for all rows)
1660 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1661          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1662 
1663    Options Database Keys:
1664 .   -mat_no_unroll - uses code that does not unroll the loops in the
1665                      block calculations (much slower)
1666 .    -mat_block_size - size of the blocks to use
1667 
1668    Level: intermediate
1669 
1670    Notes:
1671    Specify the preallocated storage with either nz or nnz (not both).
1672    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1673    allocation.  For additional details, see the users manual chapter on
1674    matrices.
1675 
1676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1677 @*/
1678 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1679   int ierr,(*f)(Mat,int,int,const int[]);
1680 
1681   PetscFunctionBegin;
1682   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1683   if (f) {
1684     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatCreateSeqSBAIJ"
1691 /*@C
1692    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1693    compressed row) format.  For good matrix assembly performance the
1694    user should preallocate the matrix storage by setting the parameter nz
1695    (or the array nnz).  By setting these parameters accurately, performance
1696    during matrix assembly can be increased by more than a factor of 50.
1697 
1698    Collective on MPI_Comm
1699 
1700    Input Parameters:
1701 +  comm - MPI communicator, set to PETSC_COMM_SELF
1702 .  bs - size of block
1703 .  m - number of rows, or number of columns
1704 .  nz - number of block nonzeros per block row (same for all rows)
1705 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1706          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1707 
1708    Output Parameter:
1709 .  A - the symmetric matrix
1710 
1711    Options Database Keys:
1712 .   -mat_no_unroll - uses code that does not unroll the loops in the
1713                      block calculations (much slower)
1714 .    -mat_block_size - size of the blocks to use
1715 
1716    Level: intermediate
1717 
1718    Notes:
1719 
1720    Specify the preallocated storage with either nz or nnz (not both).
1721    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1722    allocation.  For additional details, see the users manual chapter on
1723    matrices.
1724 
1725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1726 @*/
1727 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1728 {
1729   int ierr;
1730 
1731   PetscFunctionBegin;
1732   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1733   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1734   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1740 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1741 {
1742   Mat         C;
1743   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1744   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1745 
1746   PetscFunctionBegin;
1747   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1748 
1749   *B = 0;
1750   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1751   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1752   c    = (Mat_SeqSBAIJ*)C->data;
1753 
1754   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1755   C->preallocated   = PETSC_TRUE;
1756   C->factor         = A->factor;
1757   c->row            = 0;
1758   c->icol           = 0;
1759   c->saved_values   = 0;
1760   c->keepzeroedrows = a->keepzeroedrows;
1761   C->assembled      = PETSC_TRUE;
1762 
1763   c->bs         = a->bs;
1764   c->bs2        = a->bs2;
1765   c->mbs        = a->mbs;
1766   c->nbs        = a->nbs;
1767 
1768   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1769   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1770   for (i=0; i<mbs; i++) {
1771     c->imax[i] = a->imax[i];
1772     c->ilen[i] = a->ilen[i];
1773   }
1774 
1775   /* allocate the matrix space */
1776   c->singlemalloc = PETSC_TRUE;
1777   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1778   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1779   c->j = (int*)(c->a + nz*bs2);
1780   c->i = c->j + nz;
1781   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1782   if (mbs > 0) {
1783     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1784     if (cpvalues == MAT_COPY_VALUES) {
1785       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1786     } else {
1787       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1788     }
1789   }
1790 
1791   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1792   c->sorted      = a->sorted;
1793   c->roworiented = a->roworiented;
1794   c->nonew       = a->nonew;
1795 
1796   if (a->diag) {
1797     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1798     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1799     for (i=0; i<mbs; i++) {
1800       c->diag[i] = a->diag[i];
1801     }
1802   } else c->diag        = 0;
1803   c->s_nz               = a->s_nz;
1804   c->s_maxnz            = a->s_maxnz;
1805   c->solve_work         = 0;
1806   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1807   c->mult_work          = 0;
1808   *B = C;
1809   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 EXTERN_C_BEGIN
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1816 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1817 {
1818   Mat_SeqSBAIJ *a;
1819   Mat          B;
1820   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1821   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1822   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1823   int          *masked,nmask,tmp,bs2,ishift;
1824   PetscScalar  *aa;
1825   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1829   bs2  = bs*bs;
1830 
1831   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1832   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1833   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1834   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1835   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1836   M = header[1]; N = header[2]; nz = header[3];
1837 
1838   if (header[3] < 0) {
1839     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1840   }
1841 
1842   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1843 
1844   /*
1845      This code adds extra rows to make sure the number of rows is
1846     divisible by the blocksize
1847   */
1848   mbs        = M/bs;
1849   extra_rows = bs - M + bs*(mbs);
1850   if (extra_rows == bs) extra_rows = 0;
1851   else                  mbs++;
1852   if (extra_rows) {
1853     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1854   }
1855 
1856   /* read in row lengths */
1857   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1858   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1859   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1860 
1861   /* read in column indices */
1862   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1863   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1864   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1865 
1866   /* loop over row lengths determining block row lengths */
1867   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1868   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1869   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1870   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1871   masked   = mask + mbs;
1872   rowcount = 0; nzcount = 0;
1873   for (i=0; i<mbs; i++) {
1874     nmask = 0;
1875     for (j=0; j<bs; j++) {
1876       kmax = rowlengths[rowcount];
1877       for (k=0; k<kmax; k++) {
1878         tmp = jj[nzcount++]/bs;   /* block col. index */
1879         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1880       }
1881       rowcount++;
1882     }
1883     s_browlengths[i] += nmask;
1884 
1885     /* zero out the mask elements we set */
1886     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1887   }
1888 
1889   /* create our matrix */
1890   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1891   ierr = MatSetType(B,type);CHKERRQ(ierr);
1892   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1893   a = (Mat_SeqSBAIJ*)B->data;
1894 
1895   /* set matrix "i" values */
1896   a->i[0] = 0;
1897   for (i=1; i<= mbs; i++) {
1898     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1899     a->ilen[i-1] = s_browlengths[i-1];
1900   }
1901   a->s_nz = a->i[mbs];
1902 
1903   /* read in nonzero values */
1904   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1905   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1906   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1907 
1908   /* set "a" and "j" values into matrix */
1909   nzcount = 0; jcount = 0;
1910   for (i=0; i<mbs; i++) {
1911     nzcountb = nzcount;
1912     nmask    = 0;
1913     for (j=0; j<bs; j++) {
1914       kmax = rowlengths[i*bs+j];
1915       for (k=0; k<kmax; k++) {
1916         tmp = jj[nzcount++]/bs; /* block col. index */
1917         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1918       }
1919     }
1920     /* sort the masked values */
1921     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1922 
1923     /* set "j" values into matrix */
1924     maskcount = 1;
1925     for (j=0; j<nmask; j++) {
1926       a->j[jcount++]  = masked[j];
1927       mask[masked[j]] = maskcount++;
1928     }
1929 
1930     /* set "a" values into matrix */
1931     ishift = bs2*a->i[i];
1932     for (j=0; j<bs; j++) {
1933       kmax = rowlengths[i*bs+j];
1934       for (k=0; k<kmax; k++) {
1935         tmp       = jj[nzcountb]/bs ; /* block col. index */
1936         if (tmp >= i){
1937           block     = mask[tmp] - 1;
1938           point     = jj[nzcountb] - bs*tmp;
1939           idx       = ishift + bs2*block + j + bs*point;
1940           a->a[idx] = aa[nzcountb];
1941         }
1942         nzcountb++;
1943       }
1944     }
1945     /* zero out the mask elements we set */
1946     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1947   }
1948   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1949 
1950   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1951   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1952   ierr = PetscFree(aa);CHKERRQ(ierr);
1953   ierr = PetscFree(jj);CHKERRQ(ierr);
1954   ierr = PetscFree(mask);CHKERRQ(ierr);
1955 
1956   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatView_Private(B);CHKERRQ(ierr);
1959   *A = B;
1960   PetscFunctionReturn(0);
1961 }
1962 EXTERN_C_END
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1966 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1967 {
1968   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1969   MatScalar    *aa=a->a,*v,*v1;
1970   PetscScalar  *x,*b,*t,sum,d;
1971   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1972   int          nz,nz1,*vj,*vj1,i;
1973 
1974   PetscFunctionBegin;
1975   its = its*lits;
1976   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1977 
1978   if (bs > 1)
1979     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1980 
1981   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1982   if (xx != bb) {
1983     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1984   } else {
1985     b = x;
1986   }
1987 
1988   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1989 
1990   if (flag & SOR_ZERO_INITIAL_GUESS) {
1991     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1992       for (i=0; i<m; i++)
1993         t[i] = b[i];
1994 
1995       for (i=0; i<m; i++){
1996         d  = *(aa + ai[i]);  /* diag[i] */
1997         v  = aa + ai[i] + 1;
1998         vj = aj + ai[i] + 1;
1999         nz = ai[i+1] - ai[i] - 1;
2000         x[i] = omega*t[i]/d;
2001         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2002         PetscLogFlops(2*nz-1);
2003       }
2004     }
2005 
2006     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2007       for (i=0; i<m; i++)
2008         t[i] = b[i];
2009 
2010       for (i=0; i<m-1; i++){  /* update rhs */
2011         v  = aa + ai[i] + 1;
2012         vj = aj + ai[i] + 1;
2013         nz = ai[i+1] - ai[i] - 1;
2014         while (nz--) t[*vj++] -= x[i]*(*v++);
2015         PetscLogFlops(2*nz-1);
2016       }
2017       for (i=m-1; i>=0; i--){
2018         d  = *(aa + ai[i]);
2019         v  = aa + ai[i] + 1;
2020         vj = aj + ai[i] + 1;
2021         nz = ai[i+1] - ai[i] - 1;
2022         sum = t[i];
2023         while (nz--) sum -= x[*vj++]*(*v++);
2024         PetscLogFlops(2*nz-1);
2025         x[i] =   (1-omega)*x[i] + omega*sum/d;
2026       }
2027     }
2028     its--;
2029   }
2030 
2031   while (its--) {
2032     /*
2033        forward sweep:
2034        for i=0,...,m-1:
2035          sum[i] = (b[i] - U(i,:)x )/d[i];
2036          x[i]   = (1-omega)x[i] + omega*sum[i];
2037          b      = b - x[i]*U^T(i,:);
2038 
2039     */
2040     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2041       for (i=0; i<m; i++)
2042         t[i] = b[i];
2043 
2044       for (i=0; i<m; i++){
2045         d  = *(aa + ai[i]);  /* diag[i] */
2046         v  = aa + ai[i] + 1; v1=v;
2047         vj = aj + ai[i] + 1; vj1=vj;
2048         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2049         sum = t[i];
2050         while (nz1--) sum -= (*v1++)*x[*vj1++];
2051         x[i] = (1-omega)*x[i] + omega*sum/d;
2052         while (nz--) t[*vj++] -= x[i]*(*v++);
2053         PetscLogFlops(4*nz-2);
2054       }
2055     }
2056 
2057   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2058       /*
2059        backward sweep:
2060        b = b - x[i]*U^T(i,:), i=0,...,n-2
2061        for i=m-1,...,0:
2062          sum[i] = (b[i] - U(i,:)x )/d[i];
2063          x[i]   = (1-omega)x[i] + omega*sum[i];
2064       */
2065       for (i=0; i<m; i++)
2066         t[i] = b[i];
2067 
2068       for (i=0; i<m-1; i++){  /* update rhs */
2069         v  = aa + ai[i] + 1;
2070         vj = aj + ai[i] + 1;
2071         nz = ai[i+1] - ai[i] - 1;
2072         while (nz--) t[*vj++] -= x[i]*(*v++);
2073         PetscLogFlops(2*nz-1);
2074       }
2075       for (i=m-1; i>=0; i--){
2076         d  = *(aa + ai[i]);
2077         v  = aa + ai[i] + 1;
2078         vj = aj + ai[i] + 1;
2079         nz = ai[i+1] - ai[i] - 1;
2080         sum = t[i];
2081         while (nz--) sum -= x[*vj++]*(*v++);
2082         PetscLogFlops(2*nz-1);
2083         x[i] =   (1-omega)*x[i] + omega*sum/d;
2084       }
2085     }
2086   }
2087 
2088   ierr = PetscFree(t); CHKERRQ(ierr);
2089   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2090   if (bb != xx) {
2091     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2092   }
2093 
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 
2098 
2099 
2100 
2101 
2102