xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 4e7234bf172ce7019c4948834d73bd3002d026ba)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
560     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
565       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
595 {
596   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
597   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth      roworiented=a->roworiented;
601   const MatScalar *value = v;
602   MatScalar       *ap,*aa = a->a,*bap;
603 
604   PetscFunctionBegin;
605   if (roworiented) {
606     stepval = (n-1)*bs;
607   } else {
608     stepval = (m-1)*bs;
609   }
610   for (k=0; k<m; k++) { /* loop over added rows */
611     row  = im[k];
612     if (row < 0) continue;
613 #if defined(PETSC_USE_BOPT_g)
614     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
615 #endif
616     rp   = aj + ai[row];
617     ap   = aa + bs2*ai[row];
618     rmax = imax[row];
619     nrow = ailen[row];
620     low  = 0;
621     for (l=0; l<n; l++) { /* loop over added columns */
622       if (in[l] < 0) continue;
623 #if defined(PETSC_USE_BOPT_g)
624       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
625 #endif
626       col = in[l];
627       if (col < row) continue; /* ignore lower triangular block */
628       if (roworiented) {
629         value = v + k*(stepval+bs)*bs + l*bs;
630       } else {
631         value = v + l*(stepval+bs)*bs + k*bs;
632       }
633       if (!sorted) low = 0; high = nrow;
634       while (high-low > 7) {
635         t = (low+high)/2;
636         if (rp[t] > col) high = t;
637         else             low  = t;
638       }
639       for (i=low; i<high; i++) {
640         if (rp[i] > col) break;
641         if (rp[i] == col) {
642           bap  = ap +  bs2*i;
643           if (roworiented) {
644             if (is == ADD_VALUES) {
645               for (ii=0; ii<bs; ii++,value+=stepval) {
646                 for (jj=ii; jj<bs2; jj+=bs) {
647                   bap[jj] += *value++;
648                 }
649               }
650             } else {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=ii; jj<bs2; jj+=bs) {
653                   bap[jj] = *value++;
654                 }
655               }
656             }
657           } else {
658             if (is == ADD_VALUES) {
659               for (ii=0; ii<bs; ii++,value+=stepval) {
660                 for (jj=0; jj<bs; jj++) {
661                   *bap++ += *value++;
662                 }
663               }
664             } else {
665               for (ii=0; ii<bs; ii++,value+=stepval) {
666                 for (jj=0; jj<bs; jj++) {
667                   *bap++  = *value++;
668                 }
669               }
670             }
671           }
672           goto noinsert2;
673         }
674       }
675       if (nonew == 1) goto noinsert2;
676       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
677       if (nrow >= rmax) {
678         /* there is no extra room in row, therefore enlarge */
679         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
680         MatScalar *new_a;
681 
682         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
683 
684         /* malloc new storage space */
685         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
686 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
687         new_j   = (int*)(new_a + bs2*new_nz);
688         new_i   = new_j + new_nz;
689 
690         /* copy over old data into new slots */
691         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
692         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
693         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
694         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
695         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
698         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
699         /* free up old matrix storage */
700         ierr = PetscFree(a->a);CHKERRQ(ierr);
701         if (!a->singlemalloc) {
702           ierr = PetscFree(a->i);CHKERRQ(ierr);
703           ierr = PetscFree(a->j);CHKERRQ(ierr);
704         }
705         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
706         a->singlemalloc = PETSC_TRUE;
707 
708         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
709         rmax = imax[row] = imax[row] + CHUNKSIZE;
710         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
711         a->s_maxnz += bs2*CHUNKSIZE;
712         a->reallocs++;
713         a->s_nz++;
714       }
715       N = nrow++ - 1;
716       /* shift up all the later entries in this row */
717       for (ii=N; ii>=i; ii--) {
718         rp[ii+1] = rp[ii];
719         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
720       }
721       if (N >= i) {
722         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       rp[i] = col;
725       bap   = ap +  bs2*i;
726       if (roworiented) {
727         for (ii=0; ii<bs; ii++,value+=stepval) {
728           for (jj=ii; jj<bs2; jj+=bs) {
729             bap[jj] = *value++;
730           }
731         }
732       } else {
733         for (ii=0; ii<bs; ii++,value+=stepval) {
734           for (jj=0; jj<bs; jj++) {
735             *bap++  = *value++;
736           }
737         }
738       }
739       noinsert2:;
740       low = i;
741     }
742     ailen[row] = nrow;
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
750 {
751   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
752   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
753   int        m = A->m,*ip,N,*ailen = a->ilen;
754   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
755   MatScalar  *aa = a->a,*ap;
756 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
757   PetscTruth   flag;
758 #endif
759 
760   PetscFunctionBegin;
761   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
762 
763   if (m) rmax = ailen[0];
764   for (i=1; i<mbs; i++) {
765     /* move each row back by the amount of empty slots (fshift) before it*/
766     fshift += imax[i-1] - ailen[i-1];
767     rmax   = PetscMax(rmax,ailen[i]);
768     if (fshift) {
769       ip = aj + ai[i]; ap = aa + bs2*ai[i];
770       N = ailen[i];
771       for (j=0; j<N; j++) {
772         ip[j-fshift] = ip[j];
773         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
774       }
775     }
776     ai[i] = ai[i-1] + ailen[i-1];
777   }
778   if (mbs) {
779     fshift += imax[mbs-1] - ailen[mbs-1];
780     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
781   }
782   /* reset ilen and imax for each row */
783   for (i=0; i<mbs; i++) {
784     ailen[i] = imax[i] = ai[i+1] - ai[i];
785   }
786   a->s_nz = ai[mbs];
787 
788   /* diagonals may have moved, reset it */
789   if (a->diag) {
790     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
791   }
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
793            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
795            a->reallocs);
796   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
797   a->reallocs          = 0;
798   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
799 
800 #if defined(PETSC_HAVE_SPOOLES)
801   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
802   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
803 #endif
804 #if defined(PETSC_HAVE_MUMPS)
805   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
806   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
807 #endif
808 
809   PetscFunctionReturn(0);
810 }
811 
812 /*
813    This function returns an array of flags which indicate the locations of contiguous
814    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
815    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
816    Assume: sizes should be long enough to hold all the values.
817 */
818 #undef __FUNCT__
819 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
820 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
821 {
822   int        i,j,k,row;
823   PetscTruth flg;
824 
825   PetscFunctionBegin;
826   for (i=0,j=0; i<n; j++) {
827     row = idx[i];
828     if (row%bs!=0) { /* Not the begining of a block */
829       sizes[j] = 1;
830       i++;
831     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
832       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
833       i++;
834     } else { /* Begining of the block, so check if the complete block exists */
835       flg = PETSC_TRUE;
836       for (k=1; k<bs; k++) {
837         if (row+k != idx[i+k]) { /* break in the block */
838           flg = PETSC_FALSE;
839           break;
840         }
841       }
842       if (flg == PETSC_TRUE) { /* No break in the bs */
843         sizes[j] = bs;
844         i+= bs;
845       } else {
846         sizes[j] = 1;
847         i++;
848       }
849     }
850   }
851   *bs_max = j;
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
857 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
858 {
859   PetscFunctionBegin;
860   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
861 }
862 
863 /* Only add/insert a(i,j) with i<=j (blocks).
864    Any a(i,j) with i>j input by user is ingored.
865 */
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
869 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
870 {
871   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
872   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
873   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
874   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
875   int         ridx,cidx,bs2=a->bs2,ierr;
876   MatScalar   *ap,value,*aa=a->a,*bap;
877 
878   PetscFunctionBegin;
879 
880   for (k=0; k<m; k++) { /* loop over added rows */
881     row  = im[k];       /* row number */
882     brow = row/bs;      /* block row number */
883     if (row < 0) continue;
884 #if defined(PETSC_USE_BOPT_g)
885     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
886 #endif
887     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
888     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
889     rmax = imax[brow];  /* maximum space allocated for this row */
890     nrow = ailen[brow]; /* actual length of this row */
891     low  = 0;
892 
893     for (l=0; l<n; l++) { /* loop over added columns */
894       if (in[l] < 0) continue;
895 #if defined(PETSC_USE_BOPT_g)
896       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
897 #endif
898       col = in[l];
899       bcol = col/bs;              /* block col number */
900 
901       if (brow <= bcol){
902         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
903         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
904           /* element value a(k,l) */
905           if (roworiented) {
906             value = v[l + k*n];
907           } else {
908             value = v[k + l*m];
909           }
910 
911           /* move pointer bap to a(k,l) quickly and add/insert value */
912           if (!sorted) low = 0; high = nrow;
913           while (high-low > 7) {
914             t = (low+high)/2;
915             if (rp[t] > bcol) high = t;
916             else              low  = t;
917           }
918           for (i=low; i<high; i++) {
919             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
920             if (rp[i] > bcol) break;
921             if (rp[i] == bcol) {
922               bap  = ap +  bs2*i + bs*cidx + ridx;
923               if (is == ADD_VALUES) *bap += value;
924               else                  *bap  = value;
925               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
926               if (brow == bcol && ridx < cidx){
927                 bap  = ap +  bs2*i + bs*ridx + cidx;
928                 if (is == ADD_VALUES) *bap += value;
929                 else                  *bap  = value;
930               }
931               goto noinsert1;
932             }
933           }
934 
935       if (nonew == 1) goto noinsert1;
936       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
937       if (nrow >= rmax) {
938         /* there is no extra room in row, therefore enlarge */
939         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
940         MatScalar *new_a;
941 
942         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
943 
944         /* Malloc new storage space */
945         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
946         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
947         new_j = (int*)(new_a + bs2*new_nz);
948         new_i = new_j + new_nz;
949 
950         /* copy over old data into new slots */
951         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
952         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
953         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
954         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
955         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
956         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
957         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
958         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
959         /* free up old matrix storage */
960         ierr = PetscFree(a->a);CHKERRQ(ierr);
961         if (!a->singlemalloc) {
962           ierr = PetscFree(a->i);CHKERRQ(ierr);
963           ierr = PetscFree(a->j);CHKERRQ(ierr);
964         }
965         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
966         a->singlemalloc = PETSC_TRUE;
967 
968         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
969         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
970         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
971         a->s_maxnz += bs2*CHUNKSIZE;
972         a->reallocs++;
973         a->s_nz++;
974       }
975 
976       N = nrow++ - 1;
977       /* shift up all the later entries in this row */
978       for (ii=N; ii>=i; ii--) {
979         rp[ii+1] = rp[ii];
980         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
981       }
982       if (N>=i) {
983         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
984       }
985       rp[i]                      = bcol;
986       ap[bs2*i + bs*cidx + ridx] = value;
987       noinsert1:;
988       low = i;
989       /* } */
990         }
991       } /* end of if .. if..  */
992     }                     /* end of loop over added columns */
993     ailen[brow] = nrow;
994   }                       /* end of loop over added rows */
995 
996   PetscFunctionReturn(0);
997 }
998 
999 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
1000 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
1001 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
1002 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1003 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
1004 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1005 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1006 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
1007 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1008 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1009 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1010 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1011 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1012 extern int MatZeroEntries_SeqSBAIJ(Mat);
1013 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1014 
1015 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1019 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1020 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1021 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1022 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1026 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1027 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1028 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1029 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1030 
1031 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1032 
1033 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1037 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1038 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1039 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1040 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1041 
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1048 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1049 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1050 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1051 
1052 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1056 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1057 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1058 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1059 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1060 
1061 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1065 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1066 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1067 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1068 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1069 
1070 #ifdef HAVE_MatICCFactor
1071 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1074 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1075 {
1076   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1077   Mat         outA;
1078   int         ierr;
1079   PetscTruth  row_identity,col_identity;
1080 
1081   PetscFunctionBegin;
1082   /*
1083   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1084   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1085   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1086   if (!row_identity || !col_identity) {
1087     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1088   }
1089   */
1090 
1091   outA          = inA;
1092   inA->factor   = FACTOR_CHOLESKY;
1093 
1094   if (!a->diag) {
1095     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1096   }
1097   /*
1098       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1099       for ILU(0) factorization with natural ordering
1100   */
1101   switch (a->bs) {
1102   case 1:
1103     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1104     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1105     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1106     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1107   case 2:
1108     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1109     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1110     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1111     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1112     break;
1113   case 3:
1114     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1115     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1116     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1117     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1118     break;
1119   case 4:
1120     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1121     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1122     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1123     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1124     break;
1125   case 5:
1126     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1127     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1128     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1129     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1130     break;
1131   case 6:
1132     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1133     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1134     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1135     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1136     break;
1137   case 7:
1138     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1139     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1140     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1141     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1142     break;
1143   default:
1144     a->row        = row;
1145     a->icol       = col;
1146     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1147     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1148 
1149     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1150     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1151     PetscLogObjectParent(inA,a->icol);
1152 
1153     if (!a->solve_work) {
1154       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1155       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1156     }
1157   }
1158 
1159   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1160 
1161   PetscFunctionReturn(0);
1162 }
1163 #endif
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1167 int MatPrintHelp_SeqSBAIJ(Mat A)
1168 {
1169   static PetscTruth called = PETSC_FALSE;
1170   MPI_Comm          comm = A->comm;
1171   int               ierr;
1172 
1173   PetscFunctionBegin;
1174   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1175   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1176   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 EXTERN_C_BEGIN
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1183 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1184 {
1185   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1186   int         i,nz,n;
1187 
1188   PetscFunctionBegin;
1189   nz = baij->s_maxnz;
1190   n  = mat->n;
1191   for (i=0; i<nz; i++) {
1192     baij->j[i] = indices[i];
1193   }
1194   baij->s_nz = nz;
1195   for (i=0; i<n; i++) {
1196     baij->ilen[i] = baij->imax[i];
1197   }
1198 
1199   PetscFunctionReturn(0);
1200 }
1201 EXTERN_C_END
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1205 /*@
1206     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1207        in the matrix.
1208 
1209   Input Parameters:
1210 +  mat     - the SeqSBAIJ matrix
1211 -  indices - the column indices
1212 
1213   Level: advanced
1214 
1215   Notes:
1216     This can be called if you have precomputed the nonzero structure of the
1217   matrix and want to provide it to the matrix object to improve the performance
1218   of the MatSetValues() operation.
1219 
1220     You MUST have set the correct numbers of nonzeros per row in the call to
1221   MatCreateSeqSBAIJ().
1222 
1223     MUST be called before any calls to MatSetValues()
1224 
1225 .seealso: MatCreateSeqSBAIJ
1226 @*/
1227 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1228 {
1229   int ierr,(*f)(Mat,int *);
1230 
1231   PetscFunctionBegin;
1232   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1233   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1234   if (f) {
1235     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1236   } else {
1237     SETERRQ(1,"Wrong type of matrix to set column indices");
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1244 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1245 {
1246   int        ierr;
1247 
1248   PetscFunctionBegin;
1249   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1255 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1256 {
1257   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1258   PetscFunctionBegin;
1259   *array = a->a;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1265 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1266 {
1267   PetscFunctionBegin;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include "petscblaslapack.h"
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1274 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1275 {
1276   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1277   int          ierr,one=1,i,bs=y->bs,bs2,j;
1278 
1279   PetscFunctionBegin;
1280   if (str == SAME_NONZERO_PATTERN) {
1281     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1282   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1283     if (y->xtoy && y->XtoY != X) {
1284       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1285       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1286     }
1287     if (!y->xtoy) { /* get xtoy */
1288       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1289       y->XtoY = X;
1290     }
1291     bs2 = bs*bs;
1292     for (i=0; i<x->s_nz; i++) {
1293       j = 0;
1294       while (j < bs2){
1295         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1296         j++;
1297       }
1298     }
1299     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1300   } else {
1301     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 /* -------------------------------------------------------------------*/
1307 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1308        MatGetRow_SeqSBAIJ,
1309        MatRestoreRow_SeqSBAIJ,
1310        MatMult_SeqSBAIJ_N,
1311        MatMultAdd_SeqSBAIJ_N,
1312        MatMultTranspose_SeqSBAIJ,
1313        MatMultTransposeAdd_SeqSBAIJ,
1314        MatSolve_SeqSBAIJ_N,
1315        0,
1316        0,
1317        0,
1318        0,
1319        MatCholeskyFactor_SeqSBAIJ,
1320        MatRelax_SeqSBAIJ,
1321        MatTranspose_SeqSBAIJ,
1322        MatGetInfo_SeqSBAIJ,
1323        MatEqual_SeqSBAIJ,
1324        MatGetDiagonal_SeqSBAIJ,
1325        MatDiagonalScale_SeqSBAIJ,
1326        MatNorm_SeqSBAIJ,
1327        0,
1328        MatAssemblyEnd_SeqSBAIJ,
1329        0,
1330        MatSetOption_SeqSBAIJ,
1331        MatZeroEntries_SeqSBAIJ,
1332        MatZeroRows_SeqSBAIJ,
1333        0,
1334        0,
1335        MatCholeskyFactorSymbolic_SeqSBAIJ,
1336        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1337        MatSetUpPreallocation_SeqSBAIJ,
1338        0,
1339        MatICCFactorSymbolic_SeqSBAIJ,
1340        MatGetArray_SeqSBAIJ,
1341        MatRestoreArray_SeqSBAIJ,
1342        MatDuplicate_SeqSBAIJ,
1343        0,
1344        0,
1345        0,
1346        0,
1347        MatAXPY_SeqSBAIJ,
1348        MatGetSubMatrices_SeqSBAIJ,
1349        MatIncreaseOverlap_SeqSBAIJ,
1350        MatGetValues_SeqSBAIJ,
1351        0,
1352        MatPrintHelp_SeqSBAIJ,
1353        MatScale_SeqSBAIJ,
1354        0,
1355        0,
1356        0,
1357        MatGetBlockSize_SeqSBAIJ,
1358        MatGetRowIJ_SeqSBAIJ,
1359        MatRestoreRowIJ_SeqSBAIJ,
1360        0,
1361        0,
1362        0,
1363        0,
1364        0,
1365        0,
1366        MatSetValuesBlocked_SeqSBAIJ,
1367        MatGetSubMatrix_SeqSBAIJ,
1368        0,
1369        0,
1370        MatGetPetscMaps_Petsc,
1371        0,
1372        0,
1373        0,
1374        0,
1375        0,
1376        0,
1377        MatGetRowMax_SeqSBAIJ,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390        0,
1391 #if !defined(PETSC_USE_COMPLEX)
1392        MatGetInertia_SeqSBAIJ
1393 #else
1394        0
1395 #endif
1396 };
1397 
1398 EXTERN_C_BEGIN
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1401 int MatStoreValues_SeqSBAIJ(Mat mat)
1402 {
1403   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1404   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1405   int         ierr;
1406 
1407   PetscFunctionBegin;
1408   if (aij->nonew != 1) {
1409     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1410   }
1411 
1412   /* allocate space for values if not already there */
1413   if (!aij->saved_values) {
1414     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1415   }
1416 
1417   /* copy values over */
1418   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 EXTERN_C_END
1422 
1423 EXTERN_C_BEGIN
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1426 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1427 {
1428   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1429   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1430 
1431   PetscFunctionBegin;
1432   if (aij->nonew != 1) {
1433     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1434   }
1435   if (!aij->saved_values) {
1436     SETERRQ(1,"Must call MatStoreValues(A);first");
1437   }
1438 
1439   /* copy values over */
1440   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 EXTERN_C_END
1444 
1445 EXTERN_C_BEGIN
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1448 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1449 {
1450   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1451   int          i,len,ierr,mbs,bs2;
1452   PetscTruth   flg;
1453   int          s_nz;
1454 
1455   PetscFunctionBegin;
1456   B->preallocated = PETSC_TRUE;
1457   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1458   mbs  = B->m/bs;
1459   bs2  = bs*bs;
1460 
1461   if (mbs*bs != B->m) {
1462     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1463   }
1464 
1465   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1466   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1467   if (nnz) {
1468     for (i=0; i<mbs; i++) {
1469       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1470       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1471     }
1472   }
1473 
1474   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1475   if (!flg) {
1476     switch (bs) {
1477     case 1:
1478       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1479       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1480       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1481       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1482       B->ops->mult            = MatMult_SeqSBAIJ_1;
1483       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1484       break;
1485     case 2:
1486       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1487       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1488       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1489       B->ops->mult            = MatMult_SeqSBAIJ_2;
1490       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1491       break;
1492     case 3:
1493       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1494       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1495       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1496       B->ops->mult            = MatMult_SeqSBAIJ_3;
1497       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1498       break;
1499     case 4:
1500       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1501       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1502       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1503       B->ops->mult            = MatMult_SeqSBAIJ_4;
1504       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1505       break;
1506     case 5:
1507       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1508       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1509       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1510       B->ops->mult            = MatMult_SeqSBAIJ_5;
1511       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1512       break;
1513     case 6:
1514       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1515       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1516       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1517       B->ops->mult            = MatMult_SeqSBAIJ_6;
1518       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1519       break;
1520     case 7:
1521       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1522       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1523       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1524       B->ops->mult            = MatMult_SeqSBAIJ_7;
1525       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1526       break;
1527     }
1528   }
1529 
1530   b->mbs = mbs;
1531   b->nbs = mbs;
1532   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1533   if (!nnz) {
1534     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1535     else if (nz <= 0)        nz = 1;
1536     for (i=0; i<mbs; i++) {
1537       b->imax[i] = nz;
1538     }
1539     nz = nz*mbs; /* total nz */
1540   } else {
1541     nz = 0;
1542     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1543   }
1544   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1545   s_nz = nz;
1546 
1547   /* allocate the matrix space */
1548   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1549   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1550   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1551   b->j = (int*)(b->a + s_nz*bs2);
1552   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1553   b->i = b->j + s_nz;
1554   b->singlemalloc = PETSC_TRUE;
1555 
1556   /* pointer to beginning of each row */
1557   b->i[0] = 0;
1558   for (i=1; i<mbs+1; i++) {
1559     b->i[i] = b->i[i-1] + b->imax[i-1];
1560   }
1561 
1562   /* b->ilen will count nonzeros in each block row so far. */
1563   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1564   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1565   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1566 
1567   b->bs               = bs;
1568   b->bs2              = bs2;
1569   b->s_nz             = 0;
1570   b->s_maxnz          = s_nz*bs2;
1571 
1572   b->inew             = 0;
1573   b->jnew             = 0;
1574   b->anew             = 0;
1575   b->a2anew           = 0;
1576   b->permute          = PETSC_FALSE;
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1584 int MatCreate_SeqSBAIJ(Mat B)
1585 {
1586   Mat_SeqSBAIJ *b;
1587   int          ierr,size;
1588 
1589   PetscFunctionBegin;
1590   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1591   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1592   B->m = B->M = PetscMax(B->m,B->M);
1593   B->n = B->N = PetscMax(B->n,B->N);
1594 
1595   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1596   B->data = (void*)b;
1597   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1598   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1599   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1600   B->ops->view        = MatView_SeqSBAIJ;
1601   B->factor           = 0;
1602   B->lupivotthreshold = 1.0;
1603   B->mapping          = 0;
1604   b->row              = 0;
1605   b->icol             = 0;
1606   b->reallocs         = 0;
1607   b->saved_values     = 0;
1608 
1609   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1610   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1611 
1612   b->sorted           = PETSC_FALSE;
1613   b->roworiented      = PETSC_TRUE;
1614   b->nonew            = 0;
1615   b->diag             = 0;
1616   b->solve_work       = 0;
1617   b->mult_work        = 0;
1618   B->spptr            = 0;
1619   b->keepzeroedrows   = PETSC_FALSE;
1620   b->xtoy             = 0;
1621   b->XtoY             = 0;
1622 
1623   b->inew             = 0;
1624   b->jnew             = 0;
1625   b->anew             = 0;
1626   b->a2anew           = 0;
1627   b->permute          = PETSC_FALSE;
1628 
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1630                                      "MatStoreValues_SeqSBAIJ",
1631                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1633                                      "MatRetrieveValues_SeqSBAIJ",
1634                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1636                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1637                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1639                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1640                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1641   PetscFunctionReturn(0);
1642 }
1643 EXTERN_C_END
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1647 /*@C
1648    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1649    compressed row) format.  For good matrix assembly performance the
1650    user should preallocate the matrix storage by setting the parameter nz
1651    (or the array nnz).  By setting these parameters accurately, performance
1652    during matrix assembly can be increased by more than a factor of 50.
1653 
1654    Collective on Mat
1655 
1656    Input Parameters:
1657 +  A - the symmetric matrix
1658 .  bs - size of block
1659 .  nz - number of block nonzeros per block row (same for all rows)
1660 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1661          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1662 
1663    Options Database Keys:
1664 .   -mat_no_unroll - uses code that does not unroll the loops in the
1665                      block calculations (much slower)
1666 .    -mat_block_size - size of the blocks to use
1667 
1668    Level: intermediate
1669 
1670    Notes:
1671    Specify the preallocated storage with either nz or nnz (not both).
1672    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1673    allocation.  For additional details, see the users manual chapter on
1674    matrices.
1675 
1676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1677 @*/
1678 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1679   int ierr,(*f)(Mat,int,int,const int[]);
1680 
1681   PetscFunctionBegin;
1682   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1683   if (f) {
1684     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatCreateSeqSBAIJ"
1691 /*@C
1692    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1693    compressed row) format.  For good matrix assembly performance the
1694    user should preallocate the matrix storage by setting the parameter nz
1695    (or the array nnz).  By setting these parameters accurately, performance
1696    during matrix assembly can be increased by more than a factor of 50.
1697 
1698    Collective on MPI_Comm
1699 
1700    Input Parameters:
1701 +  comm - MPI communicator, set to PETSC_COMM_SELF
1702 .  bs - size of block
1703 .  m - number of rows, or number of columns
1704 .  nz - number of block nonzeros per block row (same for all rows)
1705 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1706          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1707 
1708    Output Parameter:
1709 .  A - the symmetric matrix
1710 
1711    Options Database Keys:
1712 .   -mat_no_unroll - uses code that does not unroll the loops in the
1713                      block calculations (much slower)
1714 .    -mat_block_size - size of the blocks to use
1715 
1716    Level: intermediate
1717 
1718    Notes:
1719 
1720    Specify the preallocated storage with either nz or nnz (not both).
1721    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1722    allocation.  For additional details, see the users manual chapter on
1723    matrices.
1724 
1725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1726 @*/
1727 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1728 {
1729   int ierr;
1730 
1731   PetscFunctionBegin;
1732   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1733   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1734   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1740 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1741 {
1742   Mat         C;
1743   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1744   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1745 
1746   PetscFunctionBegin;
1747   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1748 
1749   *B = 0;
1750   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1751   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1752   c    = (Mat_SeqSBAIJ*)C->data;
1753 
1754   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1755   C->preallocated   = PETSC_TRUE;
1756   C->factor         = A->factor;
1757   c->row            = 0;
1758   c->icol           = 0;
1759   c->saved_values   = 0;
1760   c->keepzeroedrows = a->keepzeroedrows;
1761   C->assembled      = PETSC_TRUE;
1762 
1763   c->bs         = a->bs;
1764   c->bs2        = a->bs2;
1765   c->mbs        = a->mbs;
1766   c->nbs        = a->nbs;
1767 
1768   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1769   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1770   for (i=0; i<mbs; i++) {
1771     c->imax[i] = a->imax[i];
1772     c->ilen[i] = a->ilen[i];
1773   }
1774 
1775   /* allocate the matrix space */
1776   c->singlemalloc = PETSC_TRUE;
1777   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1778   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1779   c->j = (int*)(c->a + nz*bs2);
1780   c->i = c->j + nz;
1781   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1782   if (mbs > 0) {
1783     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1784     if (cpvalues == MAT_COPY_VALUES) {
1785       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1786     } else {
1787       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1788     }
1789   }
1790 
1791   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1792   c->sorted      = a->sorted;
1793   c->roworiented = a->roworiented;
1794   c->nonew       = a->nonew;
1795 
1796   if (a->diag) {
1797     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1798     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1799     for (i=0; i<mbs; i++) {
1800       c->diag[i] = a->diag[i];
1801     }
1802   } else c->diag        = 0;
1803   c->s_nz               = a->s_nz;
1804   c->s_maxnz            = a->s_maxnz;
1805   c->solve_work         = 0;
1806   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1807   c->mult_work          = 0;
1808   *B = C;
1809   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 EXTERN_C_BEGIN
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1816 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1817 {
1818   Mat_SeqSBAIJ *a;
1819   Mat          B;
1820   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1821   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1822   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1823   int          *masked,nmask,tmp,bs2,ishift;
1824   PetscScalar  *aa;
1825   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1826 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
1827   PetscTruth   flag;
1828 #endif
1829 
1830   PetscFunctionBegin;
1831   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1832   bs2  = bs*bs;
1833 
1834   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1835   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1836   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1837   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1838   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1839   M = header[1]; N = header[2]; nz = header[3];
1840 
1841   if (header[3] < 0) {
1842     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1843   }
1844 
1845   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1846 
1847   /*
1848      This code adds extra rows to make sure the number of rows is
1849     divisible by the blocksize
1850   */
1851   mbs        = M/bs;
1852   extra_rows = bs - M + bs*(mbs);
1853   if (extra_rows == bs) extra_rows = 0;
1854   else                  mbs++;
1855   if (extra_rows) {
1856     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1857   }
1858 
1859   /* read in row lengths */
1860   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1861   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1862   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1863 
1864   /* read in column indices */
1865   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1866   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1867   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1868 
1869   /* loop over row lengths determining block row lengths */
1870   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1871   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1872   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1873   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1874   masked   = mask + mbs;
1875   rowcount = 0; nzcount = 0;
1876   for (i=0; i<mbs; i++) {
1877     nmask = 0;
1878     for (j=0; j<bs; j++) {
1879       kmax = rowlengths[rowcount];
1880       for (k=0; k<kmax; k++) {
1881         tmp = jj[nzcount++]/bs;   /* block col. index */
1882         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1883       }
1884       rowcount++;
1885     }
1886     s_browlengths[i] += nmask;
1887 
1888     /* zero out the mask elements we set */
1889     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1890   }
1891 
1892   /* create our matrix */
1893   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1894   B = *A;
1895   a = (Mat_SeqSBAIJ*)B->data;
1896 
1897   /* set matrix "i" values */
1898   a->i[0] = 0;
1899   for (i=1; i<= mbs; i++) {
1900     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1901     a->ilen[i-1] = s_browlengths[i-1];
1902   }
1903   a->s_nz = a->i[mbs];
1904 
1905   /* read in nonzero values */
1906   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1907   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1908   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1909 
1910   /* set "a" and "j" values into matrix */
1911   nzcount = 0; jcount = 0;
1912   for (i=0; i<mbs; i++) {
1913     nzcountb = nzcount;
1914     nmask    = 0;
1915     for (j=0; j<bs; j++) {
1916       kmax = rowlengths[i*bs+j];
1917       for (k=0; k<kmax; k++) {
1918         tmp = jj[nzcount++]/bs; /* block col. index */
1919         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1920       }
1921     }
1922     /* sort the masked values */
1923     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1924 
1925     /* set "j" values into matrix */
1926     maskcount = 1;
1927     for (j=0; j<nmask; j++) {
1928       a->j[jcount++]  = masked[j];
1929       mask[masked[j]] = maskcount++;
1930     }
1931 
1932     /* set "a" values into matrix */
1933     ishift = bs2*a->i[i];
1934     for (j=0; j<bs; j++) {
1935       kmax = rowlengths[i*bs+j];
1936       for (k=0; k<kmax; k++) {
1937         tmp       = jj[nzcountb]/bs ; /* block col. index */
1938         if (tmp >= i){
1939           block     = mask[tmp] - 1;
1940           point     = jj[nzcountb] - bs*tmp;
1941           idx       = ishift + bs2*block + j + bs*point;
1942           a->a[idx] = aa[nzcountb];
1943         }
1944         nzcountb++;
1945       }
1946     }
1947     /* zero out the mask elements we set */
1948     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1949   }
1950   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1951 
1952   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1953   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1954   ierr = PetscFree(aa);CHKERRQ(ierr);
1955   ierr = PetscFree(jj);CHKERRQ(ierr);
1956   ierr = PetscFree(mask);CHKERRQ(ierr);
1957 
1958   B->assembled = PETSC_TRUE;
1959 #if defined(PETSC_HAVE_SPOOLES)
1960   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
1961   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); }
1962 #endif
1963 #if defined(PETSC_HAVE_MUMPS)
1964   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
1965   if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); }
1966 #endif
1967   ierr = MatView_Private(B);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 EXTERN_C_END
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1974 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1975 {
1976   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1977   MatScalar    *aa=a->a,*v,*v1;
1978   PetscScalar  *x,*b,*t,sum,d;
1979   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1980   int          nz,nz1,*vj,*vj1,i;
1981 
1982   PetscFunctionBegin;
1983   its = its*lits;
1984   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1985 
1986   if (bs > 1)
1987     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1988 
1989   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1990   if (xx != bb) {
1991     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1992   } else {
1993     b = x;
1994   }
1995 
1996   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1997 
1998   if (flag & SOR_ZERO_INITIAL_GUESS) {
1999     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2000       for (i=0; i<m; i++)
2001         t[i] = b[i];
2002 
2003       for (i=0; i<m; i++){
2004         d  = *(aa + ai[i]);  /* diag[i] */
2005         v  = aa + ai[i] + 1;
2006         vj = aj + ai[i] + 1;
2007         nz = ai[i+1] - ai[i] - 1;
2008         x[i] = omega*t[i]/d;
2009         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2010         PetscLogFlops(2*nz-1);
2011       }
2012     }
2013 
2014     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2015       for (i=0; i<m; i++)
2016         t[i] = b[i];
2017 
2018       for (i=0; i<m-1; i++){  /* update rhs */
2019         v  = aa + ai[i] + 1;
2020         vj = aj + ai[i] + 1;
2021         nz = ai[i+1] - ai[i] - 1;
2022         while (nz--) t[*vj++] -= x[i]*(*v++);
2023         PetscLogFlops(2*nz-1);
2024       }
2025       for (i=m-1; i>=0; i--){
2026         d  = *(aa + ai[i]);
2027         v  = aa + ai[i] + 1;
2028         vj = aj + ai[i] + 1;
2029         nz = ai[i+1] - ai[i] - 1;
2030         sum = t[i];
2031         while (nz--) sum -= x[*vj++]*(*v++);
2032         PetscLogFlops(2*nz-1);
2033         x[i] =   (1-omega)*x[i] + omega*sum/d;
2034       }
2035     }
2036     its--;
2037   }
2038 
2039   while (its--) {
2040     /*
2041        forward sweep:
2042        for i=0,...,m-1:
2043          sum[i] = (b[i] - U(i,:)x )/d[i];
2044          x[i]   = (1-omega)x[i] + omega*sum[i];
2045          b      = b - x[i]*U^T(i,:);
2046 
2047     */
2048     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2049       for (i=0; i<m; i++)
2050         t[i] = b[i];
2051 
2052       for (i=0; i<m; i++){
2053         d  = *(aa + ai[i]);  /* diag[i] */
2054         v  = aa + ai[i] + 1; v1=v;
2055         vj = aj + ai[i] + 1; vj1=vj;
2056         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2057         sum = t[i];
2058         while (nz1--) sum -= (*v1++)*x[*vj1++];
2059         x[i] = (1-omega)*x[i] + omega*sum/d;
2060         while (nz--) t[*vj++] -= x[i]*(*v++);
2061         PetscLogFlops(4*nz-2);
2062       }
2063     }
2064 
2065   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2066       /*
2067        backward sweep:
2068        b = b - x[i]*U^T(i,:), i=0,...,n-2
2069        for i=m-1,...,0:
2070          sum[i] = (b[i] - U(i,:)x )/d[i];
2071          x[i]   = (1-omega)x[i] + omega*sum[i];
2072       */
2073       for (i=0; i<m; i++)
2074         t[i] = b[i];
2075 
2076       for (i=0; i<m-1; i++){  /* update rhs */
2077         v  = aa + ai[i] + 1;
2078         vj = aj + ai[i] + 1;
2079         nz = ai[i+1] - ai[i] - 1;
2080         while (nz--) t[*vj++] -= x[i]*(*v++);
2081         PetscLogFlops(2*nz-1);
2082       }
2083       for (i=m-1; i>=0; i--){
2084         d  = *(aa + ai[i]);
2085         v  = aa + ai[i] + 1;
2086         vj = aj + ai[i] + 1;
2087         nz = ai[i+1] - ai[i] - 1;
2088         sum = t[i];
2089         while (nz--) sum -= x[*vj++]*(*v++);
2090         PetscLogFlops(2*nz-1);
2091         x[i] =   (1-omega)*x[i] + omega*sum/d;
2092       }
2093     }
2094   }
2095 
2096   ierr = PetscFree(t); CHKERRQ(ierr);
2097   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2098   if (bb != xx) {
2099     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2100   }
2101 
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 
2106 
2107 
2108 
2109 
2110