xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision d34347eca8ac9657cccc548b3ba1c8201ddea9cf)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
560     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
565       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
595 {
596   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
597   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth      roworiented=a->roworiented;
601   const MatScalar *value = v;
602   MatScalar       *ap,*aa = a->a,*bap;
603 
604   PetscFunctionBegin;
605   if (roworiented) {
606     stepval = (n-1)*bs;
607   } else {
608     stepval = (m-1)*bs;
609   }
610   for (k=0; k<m; k++) { /* loop over added rows */
611     row  = im[k];
612     if (row < 0) continue;
613 #if defined(PETSC_USE_BOPT_g)
614     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
615 #endif
616     rp   = aj + ai[row];
617     ap   = aa + bs2*ai[row];
618     rmax = imax[row];
619     nrow = ailen[row];
620     low  = 0;
621     for (l=0; l<n; l++) { /* loop over added columns */
622       if (in[l] < 0) continue;
623 #if defined(PETSC_USE_BOPT_g)
624       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
625 #endif
626       col = in[l];
627       if (col < row) continue; /* ignore lower triangular block */
628       if (roworiented) {
629         value = v + k*(stepval+bs)*bs + l*bs;
630       } else {
631         value = v + l*(stepval+bs)*bs + k*bs;
632       }
633       if (!sorted) low = 0; high = nrow;
634       while (high-low > 7) {
635         t = (low+high)/2;
636         if (rp[t] > col) high = t;
637         else             low  = t;
638       }
639       for (i=low; i<high; i++) {
640         if (rp[i] > col) break;
641         if (rp[i] == col) {
642           bap  = ap +  bs2*i;
643           if (roworiented) {
644             if (is == ADD_VALUES) {
645               for (ii=0; ii<bs; ii++,value+=stepval) {
646                 for (jj=ii; jj<bs2; jj+=bs) {
647                   bap[jj] += *value++;
648                 }
649               }
650             } else {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=ii; jj<bs2; jj+=bs) {
653                   bap[jj] = *value++;
654                 }
655               }
656             }
657           } else {
658             if (is == ADD_VALUES) {
659               for (ii=0; ii<bs; ii++,value+=stepval) {
660                 for (jj=0; jj<bs; jj++) {
661                   *bap++ += *value++;
662                 }
663               }
664             } else {
665               for (ii=0; ii<bs; ii++,value+=stepval) {
666                 for (jj=0; jj<bs; jj++) {
667                   *bap++  = *value++;
668                 }
669               }
670             }
671           }
672           goto noinsert2;
673         }
674       }
675       if (nonew == 1) goto noinsert2;
676       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
677       if (nrow >= rmax) {
678         /* there is no extra room in row, therefore enlarge */
679         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
680         MatScalar *new_a;
681 
682         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
683 
684         /* malloc new storage space */
685         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
686 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
687         new_j   = (int*)(new_a + bs2*new_nz);
688         new_i   = new_j + new_nz;
689 
690         /* copy over old data into new slots */
691         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
692         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
693         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
694         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
695         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
698         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
699         /* free up old matrix storage */
700         ierr = PetscFree(a->a);CHKERRQ(ierr);
701         if (!a->singlemalloc) {
702           ierr = PetscFree(a->i);CHKERRQ(ierr);
703           ierr = PetscFree(a->j);CHKERRQ(ierr);
704         }
705         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
706         a->singlemalloc = PETSC_TRUE;
707 
708         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
709         rmax = imax[row] = imax[row] + CHUNKSIZE;
710         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
711         a->s_maxnz += bs2*CHUNKSIZE;
712         a->reallocs++;
713         a->s_nz++;
714       }
715       N = nrow++ - 1;
716       /* shift up all the later entries in this row */
717       for (ii=N; ii>=i; ii--) {
718         rp[ii+1] = rp[ii];
719         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
720       }
721       if (N >= i) {
722         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       rp[i] = col;
725       bap   = ap +  bs2*i;
726       if (roworiented) {
727         for (ii=0; ii<bs; ii++,value+=stepval) {
728           for (jj=ii; jj<bs2; jj+=bs) {
729             bap[jj] = *value++;
730           }
731         }
732       } else {
733         for (ii=0; ii<bs; ii++,value+=stepval) {
734           for (jj=0; jj<bs; jj++) {
735             *bap++  = *value++;
736           }
737         }
738       }
739       noinsert2:;
740       low = i;
741     }
742     ailen[row] = nrow;
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
750 {
751   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
752   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
753   int        m = A->m,*ip,N,*ailen = a->ilen;
754   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
755   MatScalar  *aa = a->a,*ap;
756 
757   PetscFunctionBegin;
758   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
759 
760   if (m) rmax = ailen[0];
761   for (i=1; i<mbs; i++) {
762     /* move each row back by the amount of empty slots (fshift) before it*/
763     fshift += imax[i-1] - ailen[i-1];
764     rmax   = PetscMax(rmax,ailen[i]);
765     if (fshift) {
766       ip = aj + ai[i]; ap = aa + bs2*ai[i];
767       N = ailen[i];
768       for (j=0; j<N; j++) {
769         ip[j-fshift] = ip[j];
770         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
771       }
772     }
773     ai[i] = ai[i-1] + ailen[i-1];
774   }
775   if (mbs) {
776     fshift += imax[mbs-1] - ailen[mbs-1];
777     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
778   }
779   /* reset ilen and imax for each row */
780   for (i=0; i<mbs; i++) {
781     ailen[i] = imax[i] = ai[i+1] - ai[i];
782   }
783   a->s_nz = ai[mbs];
784 
785   /* diagonals may have moved, reset it */
786   if (a->diag) {
787     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
788   }
789   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
790            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
791   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
792            a->reallocs);
793   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
794   a->reallocs          = 0;
795   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
796 
797   PetscFunctionReturn(0);
798 }
799 
800 /*
801    This function returns an array of flags which indicate the locations of contiguous
802    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
803    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
804    Assume: sizes should be long enough to hold all the values.
805 */
806 #undef __FUNCT__
807 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
808 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
809 {
810   int        i,j,k,row;
811   PetscTruth flg;
812 
813   PetscFunctionBegin;
814   for (i=0,j=0; i<n; j++) {
815     row = idx[i];
816     if (row%bs!=0) { /* Not the begining of a block */
817       sizes[j] = 1;
818       i++;
819     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
820       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
821       i++;
822     } else { /* Begining of the block, so check if the complete block exists */
823       flg = PETSC_TRUE;
824       for (k=1; k<bs; k++) {
825         if (row+k != idx[i+k]) { /* break in the block */
826           flg = PETSC_FALSE;
827           break;
828         }
829       }
830       if (flg == PETSC_TRUE) { /* No break in the bs */
831         sizes[j] = bs;
832         i+= bs;
833       } else {
834         sizes[j] = 1;
835         i++;
836       }
837     }
838   }
839   *bs_max = j;
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
845 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
846 {
847   PetscFunctionBegin;
848   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
849 }
850 
851 /* Only add/insert a(i,j) with i<=j (blocks).
852    Any a(i,j) with i>j input by user is ingored.
853 */
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
857 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
858 {
859   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
860   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
861   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
862   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
863   int         ridx,cidx,bs2=a->bs2,ierr;
864   MatScalar   *ap,value,*aa=a->a,*bap;
865 
866   PetscFunctionBegin;
867 
868   for (k=0; k<m; k++) { /* loop over added rows */
869     row  = im[k];       /* row number */
870     brow = row/bs;      /* block row number */
871     if (row < 0) continue;
872 #if defined(PETSC_USE_BOPT_g)
873     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
874 #endif
875     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
876     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
877     rmax = imax[brow];  /* maximum space allocated for this row */
878     nrow = ailen[brow]; /* actual length of this row */
879     low  = 0;
880 
881     for (l=0; l<n; l++) { /* loop over added columns */
882       if (in[l] < 0) continue;
883 #if defined(PETSC_USE_BOPT_g)
884       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
885 #endif
886       col = in[l];
887       bcol = col/bs;              /* block col number */
888 
889       if (brow <= bcol){
890         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
891         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
892           /* element value a(k,l) */
893           if (roworiented) {
894             value = v[l + k*n];
895           } else {
896             value = v[k + l*m];
897           }
898 
899           /* move pointer bap to a(k,l) quickly and add/insert value */
900           if (!sorted) low = 0; high = nrow;
901           while (high-low > 7) {
902             t = (low+high)/2;
903             if (rp[t] > bcol) high = t;
904             else              low  = t;
905           }
906           for (i=low; i<high; i++) {
907             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
908             if (rp[i] > bcol) break;
909             if (rp[i] == bcol) {
910               bap  = ap +  bs2*i + bs*cidx + ridx;
911               if (is == ADD_VALUES) *bap += value;
912               else                  *bap  = value;
913               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
914               if (brow == bcol && ridx < cidx){
915                 bap  = ap +  bs2*i + bs*ridx + cidx;
916                 if (is == ADD_VALUES) *bap += value;
917                 else                  *bap  = value;
918               }
919               goto noinsert1;
920             }
921           }
922 
923       if (nonew == 1) goto noinsert1;
924       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
925       if (nrow >= rmax) {
926         /* there is no extra room in row, therefore enlarge */
927         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
928         MatScalar *new_a;
929 
930         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
931 
932         /* Malloc new storage space */
933         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
934         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
935         new_j = (int*)(new_a + bs2*new_nz);
936         new_i = new_j + new_nz;
937 
938         /* copy over old data into new slots */
939         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
940         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
941         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
942         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
943         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
944         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
945         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
946         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
947         /* free up old matrix storage */
948         ierr = PetscFree(a->a);CHKERRQ(ierr);
949         if (!a->singlemalloc) {
950           ierr = PetscFree(a->i);CHKERRQ(ierr);
951           ierr = PetscFree(a->j);CHKERRQ(ierr);
952         }
953         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
954         a->singlemalloc = PETSC_TRUE;
955 
956         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
957         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
958         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
959         a->s_maxnz += bs2*CHUNKSIZE;
960         a->reallocs++;
961         a->s_nz++;
962       }
963 
964       N = nrow++ - 1;
965       /* shift up all the later entries in this row */
966       for (ii=N; ii>=i; ii--) {
967         rp[ii+1] = rp[ii];
968         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
969       }
970       if (N>=i) {
971         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
972       }
973       rp[i]                      = bcol;
974       ap[bs2*i + bs*cidx + ridx] = value;
975       noinsert1:;
976       low = i;
977       /* } */
978         }
979       } /* end of if .. if..  */
980     }                     /* end of loop over added columns */
981     ailen[brow] = nrow;
982   }                       /* end of loop over added rows */
983 
984   PetscFunctionReturn(0);
985 }
986 
987 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
988 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
989 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
990 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
991 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
992 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
993 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
994 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
995 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
996 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
997 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
998 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
999 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1000 extern int MatZeroEntries_SeqSBAIJ(Mat);
1001 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1002 
1003 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1004 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1005 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1006 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1007 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1008 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1009 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1011 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1012 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1013 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1014 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1015 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1016 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1018 
1019 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1020 
1021 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1022 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1023 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1024 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1025 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1029 
1030 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1031 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1032 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1038 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1039 
1040 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1041 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1042 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1043 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1044 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1048 
1049 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1050 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1051 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1052 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1057 
1058 #ifdef HAVE_MatICCFactor
1059 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1062 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1063 {
1064   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1065   Mat         outA;
1066   int         ierr;
1067   PetscTruth  row_identity,col_identity;
1068 
1069   PetscFunctionBegin;
1070   /*
1071   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1072   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1073   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1074   if (!row_identity || !col_identity) {
1075     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1076   }
1077   */
1078 
1079   outA          = inA;
1080   inA->factor   = FACTOR_CHOLESKY;
1081 
1082   if (!a->diag) {
1083     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1084   }
1085   /*
1086       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1087       for ILU(0) factorization with natural ordering
1088   */
1089   switch (a->bs) {
1090   case 1:
1091     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1092     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1093     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1094     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1095   case 2:
1096     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1097     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1098     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1099     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1100     break;
1101   case 3:
1102     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1103     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1104     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1105     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1106     break;
1107   case 4:
1108     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1109     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1110     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1111     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1112     break;
1113   case 5:
1114     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1115     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1116     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1117     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1118     break;
1119   case 6:
1120     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1121     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1122     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1123     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1124     break;
1125   case 7:
1126     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1127     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1128     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1129     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1130     break;
1131   default:
1132     a->row        = row;
1133     a->icol       = col;
1134     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1135     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1136 
1137     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1138     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1139     PetscLogObjectParent(inA,a->icol);
1140 
1141     if (!a->solve_work) {
1142       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1143       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1144     }
1145   }
1146 
1147   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 #endif
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1155 int MatPrintHelp_SeqSBAIJ(Mat A)
1156 {
1157   static PetscTruth called = PETSC_FALSE;
1158   MPI_Comm          comm = A->comm;
1159   int               ierr;
1160 
1161   PetscFunctionBegin;
1162   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1163   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1164   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 EXTERN_C_BEGIN
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1171 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1172 {
1173   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1174   int         i,nz,n;
1175 
1176   PetscFunctionBegin;
1177   nz = baij->s_maxnz;
1178   n  = mat->n;
1179   for (i=0; i<nz; i++) {
1180     baij->j[i] = indices[i];
1181   }
1182   baij->s_nz = nz;
1183   for (i=0; i<n; i++) {
1184     baij->ilen[i] = baij->imax[i];
1185   }
1186 
1187   PetscFunctionReturn(0);
1188 }
1189 EXTERN_C_END
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1193 /*@
1194     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1195        in the matrix.
1196 
1197   Input Parameters:
1198 +  mat     - the SeqSBAIJ matrix
1199 -  indices - the column indices
1200 
1201   Level: advanced
1202 
1203   Notes:
1204     This can be called if you have precomputed the nonzero structure of the
1205   matrix and want to provide it to the matrix object to improve the performance
1206   of the MatSetValues() operation.
1207 
1208     You MUST have set the correct numbers of nonzeros per row in the call to
1209   MatCreateSeqSBAIJ().
1210 
1211     MUST be called before any calls to MatSetValues()
1212 
1213 .seealso: MatCreateSeqSBAIJ
1214 @*/
1215 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1216 {
1217   int ierr,(*f)(Mat,int *);
1218 
1219   PetscFunctionBegin;
1220   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1221   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1222   if (f) {
1223     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1224   } else {
1225     SETERRQ(1,"Wrong type of matrix to set column indices");
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1232 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1233 {
1234   int        ierr;
1235 
1236   PetscFunctionBegin;
1237   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1243 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1244 {
1245   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1246   PetscFunctionBegin;
1247   *array = a->a;
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1253 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1254 {
1255   PetscFunctionBegin;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #include "petscblaslapack.h"
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1262 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1263 {
1264   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1265   int          ierr,one=1,i,bs=y->bs,bs2,j;
1266 
1267   PetscFunctionBegin;
1268   if (str == SAME_NONZERO_PATTERN) {
1269     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1270   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1271     if (y->xtoy && y->XtoY != X) {
1272       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1273       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1274     }
1275     if (!y->xtoy) { /* get xtoy */
1276       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1277       y->XtoY = X;
1278     }
1279     bs2 = bs*bs;
1280     for (i=0; i<x->s_nz; i++) {
1281       j = 0;
1282       while (j < bs2){
1283         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1284         j++;
1285       }
1286     }
1287     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1288   } else {
1289     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 /* -------------------------------------------------------------------*/
1295 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1296        MatGetRow_SeqSBAIJ,
1297        MatRestoreRow_SeqSBAIJ,
1298        MatMult_SeqSBAIJ_N,
1299        MatMultAdd_SeqSBAIJ_N,
1300        MatMultTranspose_SeqSBAIJ,
1301        MatMultTransposeAdd_SeqSBAIJ,
1302        MatSolve_SeqSBAIJ_N,
1303        0,
1304        0,
1305        0,
1306        0,
1307        MatCholeskyFactor_SeqSBAIJ,
1308        MatRelax_SeqSBAIJ,
1309        MatTranspose_SeqSBAIJ,
1310        MatGetInfo_SeqSBAIJ,
1311        MatEqual_SeqSBAIJ,
1312        MatGetDiagonal_SeqSBAIJ,
1313        MatDiagonalScale_SeqSBAIJ,
1314        MatNorm_SeqSBAIJ,
1315        0,
1316        MatAssemblyEnd_SeqSBAIJ,
1317        0,
1318        MatSetOption_SeqSBAIJ,
1319        MatZeroEntries_SeqSBAIJ,
1320        MatZeroRows_SeqSBAIJ,
1321        0,
1322        0,
1323        MatCholeskyFactorSymbolic_SeqSBAIJ,
1324        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1325        MatSetUpPreallocation_SeqSBAIJ,
1326        0,
1327        MatICCFactorSymbolic_SeqSBAIJ,
1328        MatGetArray_SeqSBAIJ,
1329        MatRestoreArray_SeqSBAIJ,
1330        MatDuplicate_SeqSBAIJ,
1331        0,
1332        0,
1333        0,
1334        0,
1335        MatAXPY_SeqSBAIJ,
1336        MatGetSubMatrices_SeqSBAIJ,
1337        MatIncreaseOverlap_SeqSBAIJ,
1338        MatGetValues_SeqSBAIJ,
1339        0,
1340        MatPrintHelp_SeqSBAIJ,
1341        MatScale_SeqSBAIJ,
1342        0,
1343        0,
1344        0,
1345        MatGetBlockSize_SeqSBAIJ,
1346        MatGetRowIJ_SeqSBAIJ,
1347        MatRestoreRowIJ_SeqSBAIJ,
1348        0,
1349        0,
1350        0,
1351        0,
1352        0,
1353        0,
1354        MatSetValuesBlocked_SeqSBAIJ,
1355        MatGetSubMatrix_SeqSBAIJ,
1356        0,
1357        0,
1358        MatGetPetscMaps_Petsc,
1359        0,
1360        0,
1361        0,
1362        0,
1363        0,
1364        0,
1365        MatGetRowMax_SeqSBAIJ,
1366        0,
1367        0,
1368        0,
1369        0,
1370        0,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379 #if !defined(PETSC_USE_COMPLEX)
1380        MatGetInertia_SeqSBAIJ
1381 #else
1382        0
1383 #endif
1384 };
1385 
1386 EXTERN_C_BEGIN
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1389 int MatStoreValues_SeqSBAIJ(Mat mat)
1390 {
1391   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1392   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1393   int         ierr;
1394 
1395   PetscFunctionBegin;
1396   if (aij->nonew != 1) {
1397     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1398   }
1399 
1400   /* allocate space for values if not already there */
1401   if (!aij->saved_values) {
1402     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1403   }
1404 
1405   /* copy values over */
1406   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 EXTERN_C_END
1410 
1411 EXTERN_C_BEGIN
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1414 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1415 {
1416   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1417   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1418 
1419   PetscFunctionBegin;
1420   if (aij->nonew != 1) {
1421     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1422   }
1423   if (!aij->saved_values) {
1424     SETERRQ(1,"Must call MatStoreValues(A);first");
1425   }
1426 
1427   /* copy values over */
1428   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 EXTERN_C_END
1432 
1433 EXTERN_C_BEGIN
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1436 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1437 {
1438   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1439   int          i,len,ierr,mbs,bs2;
1440   PetscTruth   flg;
1441   int          s_nz;
1442 
1443   PetscFunctionBegin;
1444   B->preallocated = PETSC_TRUE;
1445   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1446   mbs  = B->m/bs;
1447   bs2  = bs*bs;
1448 
1449   if (mbs*bs != B->m) {
1450     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1451   }
1452 
1453   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1454   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1455   if (nnz) {
1456     for (i=0; i<mbs; i++) {
1457       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1458       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1459     }
1460   }
1461 
1462   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1463   if (!flg) {
1464     switch (bs) {
1465     case 1:
1466       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1467       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1468       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1469       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1470       B->ops->mult            = MatMult_SeqSBAIJ_1;
1471       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1472       break;
1473     case 2:
1474       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1475       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1476       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1477       B->ops->mult            = MatMult_SeqSBAIJ_2;
1478       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1479       break;
1480     case 3:
1481       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1482       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1483       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1484       B->ops->mult            = MatMult_SeqSBAIJ_3;
1485       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1486       break;
1487     case 4:
1488       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1489       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1490       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1491       B->ops->mult            = MatMult_SeqSBAIJ_4;
1492       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1493       break;
1494     case 5:
1495       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1496       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1497       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1498       B->ops->mult            = MatMult_SeqSBAIJ_5;
1499       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1500       break;
1501     case 6:
1502       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1503       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1504       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1505       B->ops->mult            = MatMult_SeqSBAIJ_6;
1506       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1507       break;
1508     case 7:
1509       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1510       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1511       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1512       B->ops->mult            = MatMult_SeqSBAIJ_7;
1513       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1514       break;
1515     }
1516   }
1517 
1518   b->mbs = mbs;
1519   b->nbs = mbs;
1520   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1521   if (!nnz) {
1522     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1523     else if (nz <= 0)        nz = 1;
1524     for (i=0; i<mbs; i++) {
1525       b->imax[i] = nz;
1526     }
1527     nz = nz*mbs; /* total nz */
1528   } else {
1529     nz = 0;
1530     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1531   }
1532   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1533   s_nz = nz;
1534 
1535   /* allocate the matrix space */
1536   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1537   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1538   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1539   b->j = (int*)(b->a + s_nz*bs2);
1540   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1541   b->i = b->j + s_nz;
1542   b->singlemalloc = PETSC_TRUE;
1543 
1544   /* pointer to beginning of each row */
1545   b->i[0] = 0;
1546   for (i=1; i<mbs+1; i++) {
1547     b->i[i] = b->i[i-1] + b->imax[i-1];
1548   }
1549 
1550   /* b->ilen will count nonzeros in each block row so far. */
1551   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1552   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1553   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1554 
1555   b->bs               = bs;
1556   b->bs2              = bs2;
1557   b->s_nz             = 0;
1558   b->s_maxnz          = s_nz*bs2;
1559 
1560   b->inew             = 0;
1561   b->jnew             = 0;
1562   b->anew             = 0;
1563   b->a2anew           = 0;
1564   b->permute          = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 EXTERN_C_END
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1572 int MatCreate_SeqSBAIJ(Mat B)
1573 {
1574   Mat_SeqSBAIJ *b;
1575   int          ierr,size;
1576 
1577   PetscFunctionBegin;
1578   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1579   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1580   B->m = B->M = PetscMax(B->m,B->M);
1581   B->n = B->N = PetscMax(B->n,B->N);
1582 
1583   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1584   B->data = (void*)b;
1585   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1586   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1587   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1588   B->ops->view        = MatView_SeqSBAIJ;
1589   B->factor           = 0;
1590   B->lupivotthreshold = 1.0;
1591   B->mapping          = 0;
1592   b->row              = 0;
1593   b->icol             = 0;
1594   b->reallocs         = 0;
1595   b->saved_values     = 0;
1596 
1597   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1598   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1599 
1600   b->sorted           = PETSC_FALSE;
1601   b->roworiented      = PETSC_TRUE;
1602   b->nonew            = 0;
1603   b->diag             = 0;
1604   b->solve_work       = 0;
1605   b->mult_work        = 0;
1606   B->spptr            = 0;
1607   b->keepzeroedrows   = PETSC_FALSE;
1608   b->xtoy             = 0;
1609   b->XtoY             = 0;
1610 
1611   b->inew             = 0;
1612   b->jnew             = 0;
1613   b->anew             = 0;
1614   b->a2anew           = 0;
1615   b->permute          = PETSC_FALSE;
1616 
1617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1618                                      "MatStoreValues_SeqSBAIJ",
1619                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1621                                      "MatRetrieveValues_SeqSBAIJ",
1622                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1624                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1625                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1627                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1628                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 EXTERN_C_END
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1635 /*@C
1636    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1637    compressed row) format.  For good matrix assembly performance the
1638    user should preallocate the matrix storage by setting the parameter nz
1639    (or the array nnz).  By setting these parameters accurately, performance
1640    during matrix assembly can be increased by more than a factor of 50.
1641 
1642    Collective on Mat
1643 
1644    Input Parameters:
1645 +  A - the symmetric matrix
1646 .  bs - size of block
1647 .  nz - number of block nonzeros per block row (same for all rows)
1648 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1649          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1650 
1651    Options Database Keys:
1652 .   -mat_no_unroll - uses code that does not unroll the loops in the
1653                      block calculations (much slower)
1654 .    -mat_block_size - size of the blocks to use
1655 
1656    Level: intermediate
1657 
1658    Notes:
1659    Specify the preallocated storage with either nz or nnz (not both).
1660    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1661    allocation.  For additional details, see the users manual chapter on
1662    matrices.
1663 
1664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1665 @*/
1666 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1667   int ierr,(*f)(Mat,int,int,const int[]);
1668 
1669   PetscFunctionBegin;
1670   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1671   if (f) {
1672     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1673   }
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatCreateSeqSBAIJ"
1679 /*@C
1680    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1681    compressed row) format.  For good matrix assembly performance the
1682    user should preallocate the matrix storage by setting the parameter nz
1683    (or the array nnz).  By setting these parameters accurately, performance
1684    during matrix assembly can be increased by more than a factor of 50.
1685 
1686    Collective on MPI_Comm
1687 
1688    Input Parameters:
1689 +  comm - MPI communicator, set to PETSC_COMM_SELF
1690 .  bs - size of block
1691 .  m - number of rows, or number of columns
1692 .  nz - number of block nonzeros per block row (same for all rows)
1693 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1694          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1695 
1696    Output Parameter:
1697 .  A - the symmetric matrix
1698 
1699    Options Database Keys:
1700 .   -mat_no_unroll - uses code that does not unroll the loops in the
1701                      block calculations (much slower)
1702 .    -mat_block_size - size of the blocks to use
1703 
1704    Level: intermediate
1705 
1706    Notes:
1707 
1708    Specify the preallocated storage with either nz or nnz (not both).
1709    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1710    allocation.  For additional details, see the users manual chapter on
1711    matrices.
1712 
1713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1714 @*/
1715 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1716 {
1717   int ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1721   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1722   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1728 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1729 {
1730   Mat         C;
1731   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1732   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1733 
1734   PetscFunctionBegin;
1735   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1736 
1737   *B = 0;
1738   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1739   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1740   c    = (Mat_SeqSBAIJ*)C->data;
1741 
1742   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1743   C->preallocated   = PETSC_TRUE;
1744   C->factor         = A->factor;
1745   c->row            = 0;
1746   c->icol           = 0;
1747   c->saved_values   = 0;
1748   c->keepzeroedrows = a->keepzeroedrows;
1749   C->assembled      = PETSC_TRUE;
1750 
1751   c->bs         = a->bs;
1752   c->bs2        = a->bs2;
1753   c->mbs        = a->mbs;
1754   c->nbs        = a->nbs;
1755 
1756   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1757   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1758   for (i=0; i<mbs; i++) {
1759     c->imax[i] = a->imax[i];
1760     c->ilen[i] = a->ilen[i];
1761   }
1762 
1763   /* allocate the matrix space */
1764   c->singlemalloc = PETSC_TRUE;
1765   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1766   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1767   c->j = (int*)(c->a + nz*bs2);
1768   c->i = c->j + nz;
1769   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1770   if (mbs > 0) {
1771     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1772     if (cpvalues == MAT_COPY_VALUES) {
1773       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1774     } else {
1775       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1776     }
1777   }
1778 
1779   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1780   c->sorted      = a->sorted;
1781   c->roworiented = a->roworiented;
1782   c->nonew       = a->nonew;
1783 
1784   if (a->diag) {
1785     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1786     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1787     for (i=0; i<mbs; i++) {
1788       c->diag[i] = a->diag[i];
1789     }
1790   } else c->diag        = 0;
1791   c->s_nz               = a->s_nz;
1792   c->s_maxnz            = a->s_maxnz;
1793   c->solve_work         = 0;
1794   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1795   c->mult_work          = 0;
1796   *B = C;
1797   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1804 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1805 {
1806   Mat_SeqSBAIJ *a;
1807   Mat          B;
1808   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1809   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1810   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1811   int          *masked,nmask,tmp,bs2,ishift;
1812   PetscScalar  *aa;
1813   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1817   bs2  = bs*bs;
1818 
1819   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1820   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1821   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1822   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1823   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1824   M = header[1]; N = header[2]; nz = header[3];
1825 
1826   if (header[3] < 0) {
1827     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1828   }
1829 
1830   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1831 
1832   /*
1833      This code adds extra rows to make sure the number of rows is
1834     divisible by the blocksize
1835   */
1836   mbs        = M/bs;
1837   extra_rows = bs - M + bs*(mbs);
1838   if (extra_rows == bs) extra_rows = 0;
1839   else                  mbs++;
1840   if (extra_rows) {
1841     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1842   }
1843 
1844   /* read in row lengths */
1845   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1846   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1847   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1848 
1849   /* read in column indices */
1850   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1851   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1852   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1853 
1854   /* loop over row lengths determining block row lengths */
1855   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1856   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1857   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1858   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1859   masked   = mask + mbs;
1860   rowcount = 0; nzcount = 0;
1861   for (i=0; i<mbs; i++) {
1862     nmask = 0;
1863     for (j=0; j<bs; j++) {
1864       kmax = rowlengths[rowcount];
1865       for (k=0; k<kmax; k++) {
1866         tmp = jj[nzcount++]/bs;   /* block col. index */
1867         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1868       }
1869       rowcount++;
1870     }
1871     s_browlengths[i] += nmask;
1872 
1873     /* zero out the mask elements we set */
1874     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1875   }
1876 
1877   /* create our matrix */
1878   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1879   ierr = MatSetType(B,type);CHKERRQ(ierr);
1880   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1881   a = (Mat_SeqSBAIJ*)B->data;
1882 
1883   /* set matrix "i" values */
1884   a->i[0] = 0;
1885   for (i=1; i<= mbs; i++) {
1886     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1887     a->ilen[i-1] = s_browlengths[i-1];
1888   }
1889   a->s_nz = a->i[mbs];
1890 
1891   /* read in nonzero values */
1892   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1893   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1894   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1895 
1896   /* set "a" and "j" values into matrix */
1897   nzcount = 0; jcount = 0;
1898   for (i=0; i<mbs; i++) {
1899     nzcountb = nzcount;
1900     nmask    = 0;
1901     for (j=0; j<bs; j++) {
1902       kmax = rowlengths[i*bs+j];
1903       for (k=0; k<kmax; k++) {
1904         tmp = jj[nzcount++]/bs; /* block col. index */
1905         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1906       }
1907     }
1908     /* sort the masked values */
1909     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1910 
1911     /* set "j" values into matrix */
1912     maskcount = 1;
1913     for (j=0; j<nmask; j++) {
1914       a->j[jcount++]  = masked[j];
1915       mask[masked[j]] = maskcount++;
1916     }
1917 
1918     /* set "a" values into matrix */
1919     ishift = bs2*a->i[i];
1920     for (j=0; j<bs; j++) {
1921       kmax = rowlengths[i*bs+j];
1922       for (k=0; k<kmax; k++) {
1923         tmp       = jj[nzcountb]/bs ; /* block col. index */
1924         if (tmp >= i){
1925           block     = mask[tmp] - 1;
1926           point     = jj[nzcountb] - bs*tmp;
1927           idx       = ishift + bs2*block + j + bs*point;
1928           a->a[idx] = aa[nzcountb];
1929         }
1930         nzcountb++;
1931       }
1932     }
1933     /* zero out the mask elements we set */
1934     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1935   }
1936   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1937 
1938   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1939   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1940   ierr = PetscFree(aa);CHKERRQ(ierr);
1941   ierr = PetscFree(jj);CHKERRQ(ierr);
1942   ierr = PetscFree(mask);CHKERRQ(ierr);
1943 
1944   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1945   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1946   ierr = MatView_Private(B);CHKERRQ(ierr);
1947   *A = B;
1948   PetscFunctionReturn(0);
1949 }
1950 EXTERN_C_END
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1954 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1955 {
1956   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1957   MatScalar    *aa=a->a,*v,*v1;
1958   PetscScalar  *x,*b,*t,sum,d;
1959   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1960   int          nz,nz1,*vj,*vj1,i;
1961 
1962   PetscFunctionBegin;
1963   its = its*lits;
1964   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1965 
1966   if (bs > 1)
1967     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1968 
1969   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1970   if (xx != bb) {
1971     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1972   } else {
1973     b = x;
1974   }
1975 
1976   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1977 
1978   if (flag & SOR_ZERO_INITIAL_GUESS) {
1979     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1980       for (i=0; i<m; i++)
1981         t[i] = b[i];
1982 
1983       for (i=0; i<m; i++){
1984         d  = *(aa + ai[i]);  /* diag[i] */
1985         v  = aa + ai[i] + 1;
1986         vj = aj + ai[i] + 1;
1987         nz = ai[i+1] - ai[i] - 1;
1988         x[i] = omega*t[i]/d;
1989         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1990         PetscLogFlops(2*nz-1);
1991       }
1992     }
1993 
1994     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1995       for (i=0; i<m; i++)
1996         t[i] = b[i];
1997 
1998       for (i=0; i<m-1; i++){  /* update rhs */
1999         v  = aa + ai[i] + 1;
2000         vj = aj + ai[i] + 1;
2001         nz = ai[i+1] - ai[i] - 1;
2002         while (nz--) t[*vj++] -= x[i]*(*v++);
2003         PetscLogFlops(2*nz-1);
2004       }
2005       for (i=m-1; i>=0; i--){
2006         d  = *(aa + ai[i]);
2007         v  = aa + ai[i] + 1;
2008         vj = aj + ai[i] + 1;
2009         nz = ai[i+1] - ai[i] - 1;
2010         sum = t[i];
2011         while (nz--) sum -= x[*vj++]*(*v++);
2012         PetscLogFlops(2*nz-1);
2013         x[i] =   (1-omega)*x[i] + omega*sum/d;
2014       }
2015     }
2016     its--;
2017   }
2018 
2019   while (its--) {
2020     /*
2021        forward sweep:
2022        for i=0,...,m-1:
2023          sum[i] = (b[i] - U(i,:)x )/d[i];
2024          x[i]   = (1-omega)x[i] + omega*sum[i];
2025          b      = b - x[i]*U^T(i,:);
2026 
2027     */
2028     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2029       for (i=0; i<m; i++)
2030         t[i] = b[i];
2031 
2032       for (i=0; i<m; i++){
2033         d  = *(aa + ai[i]);  /* diag[i] */
2034         v  = aa + ai[i] + 1; v1=v;
2035         vj = aj + ai[i] + 1; vj1=vj;
2036         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2037         sum = t[i];
2038         while (nz1--) sum -= (*v1++)*x[*vj1++];
2039         x[i] = (1-omega)*x[i] + omega*sum/d;
2040         while (nz--) t[*vj++] -= x[i]*(*v++);
2041         PetscLogFlops(4*nz-2);
2042       }
2043     }
2044 
2045   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2046       /*
2047        backward sweep:
2048        b = b - x[i]*U^T(i,:), i=0,...,n-2
2049        for i=m-1,...,0:
2050          sum[i] = (b[i] - U(i,:)x )/d[i];
2051          x[i]   = (1-omega)x[i] + omega*sum[i];
2052       */
2053       for (i=0; i<m; i++)
2054         t[i] = b[i];
2055 
2056       for (i=0; i<m-1; i++){  /* update rhs */
2057         v  = aa + ai[i] + 1;
2058         vj = aj + ai[i] + 1;
2059         nz = ai[i+1] - ai[i] - 1;
2060         while (nz--) t[*vj++] -= x[i]*(*v++);
2061         PetscLogFlops(2*nz-1);
2062       }
2063       for (i=m-1; i>=0; i--){
2064         d  = *(aa + ai[i]);
2065         v  = aa + ai[i] + 1;
2066         vj = aj + ai[i] + 1;
2067         nz = ai[i+1] - ai[i] - 1;
2068         sum = t[i];
2069         while (nz--) sum -= x[*vj++]*(*v++);
2070         PetscLogFlops(2*nz-1);
2071         x[i] =   (1-omega)*x[i] + omega*sum/d;
2072       }
2073     }
2074   }
2075 
2076   ierr = PetscFree(t); CHKERRQ(ierr);
2077   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2078   if (bb != xx) {
2079     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2080   }
2081 
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 
2086 
2087 
2088 
2089 
2090