xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 79cabe49f76bcf75c9d3cf8aa0e240caa139a178)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   default:
183     SETERRQ(PETSC_ERR_SUP,"unknown option");
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
191 {
192   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
193   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
194   MatScalar    *aa,*aa_i;
195   PetscScalar  *v_i;
196 
197   PetscFunctionBegin;
198   bs  = a->bs;
199   ai  = a->i;
200   aj  = a->j;
201   aa  = a->a;
202   bs2 = a->bs2;
203 
204   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
205 
206   bn  = row/bs;   /* Block number */
207   bp  = row % bs; /* Block position */
208   M   = ai[bn+1] - ai[bn];
209   *ncols = bs*M;
210 
211   if (v) {
212     *v = 0;
213     if (*ncols) {
214       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
215       for (i=0; i<M; i++) { /* for each block in the block row */
216         v_i  = *v + i*bs;
217         aa_i = aa + bs2*(ai[bn] + i);
218         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
219       }
220     }
221   }
222 
223   if (cols) {
224     *cols = 0;
225     if (*ncols) {
226       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
227       for (i=0; i<M; i++) { /* for each block in the block row */
228         cols_i = *cols + i*bs;
229         itmp  = bs*aj[ai[bn] + i];
230         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
231       }
232     }
233   }
234 
235   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
236   /* this segment is currently removed, so only entries in the upper triangle are obtained */
237 #ifdef column_search
238   v_i    = *v    + M*bs;
239   cols_i = *cols + M*bs;
240   for (i=0; i<bn; i++){ /* for each block row */
241     M = ai[i+1] - ai[i];
242     for (j=0; j<M; j++){
243       itmp = aj[ai[i] + j];    /* block column value */
244       if (itmp == bn){
245         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
246         for (k=0; k<bs; k++) {
247           *cols_i++ = i*bs+k;
248           *v_i++    = aa_i[k];
249         }
250         *ncols += bs;
251         break;
252       }
253     }
254   }
255 #endif
256 
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
263 {
264   int ierr;
265 
266   PetscFunctionBegin;
267   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
268   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
275 {
276   int ierr;
277   PetscFunctionBegin;
278   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
285 {
286   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
288   PetscScalar  *aa;
289   FILE         *file;
290 
291   PetscFunctionBegin;
292   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
293   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
294   col_lens[0] = MAT_FILE_COOKIE;
295 
296   col_lens[1] = A->m;
297   col_lens[2] = A->m;
298   col_lens[3] = a->s_nz*bs2;
299 
300   /* store lengths of each row and write (including header) to file */
301   count = 0;
302   for (i=0; i<a->mbs; i++) {
303     for (j=0; j<bs; j++) {
304       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
305     }
306   }
307   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
308   ierr = PetscFree(col_lens);CHKERRQ(ierr);
309 
310   /* store column indices (zero start index) */
311   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
312   count = 0;
313   for (i=0; i<a->mbs; i++) {
314     for (j=0; j<bs; j++) {
315       for (k=a->i[i]; k<a->i[i+1]; k++) {
316         for (l=0; l<bs; l++) {
317           jj[count++] = bs*a->j[k] + l;
318         }
319       }
320     }
321   }
322   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
323   ierr = PetscFree(jj);CHKERRQ(ierr);
324 
325   /* store nonzero values */
326   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
327   count = 0;
328   for (i=0; i<a->mbs; i++) {
329     for (j=0; j<bs; j++) {
330       for (k=a->i[i]; k<a->i[i+1]; k++) {
331         for (l=0; l<bs; l++) {
332           aa[count++] = a->a[bs2*k + l*bs + j];
333         }
334       }
335     }
336   }
337   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
338   ierr = PetscFree(aa);CHKERRQ(ierr);
339 
340   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
341   if (file) {
342     fprintf(file,"-matload_block_size %d\n",a->bs);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
350 {
351   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
352   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
353   char              *name;
354   PetscViewerFormat format;
355 
356   PetscFunctionBegin;
357   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
358   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
359   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
360     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
361   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
362     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
363   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
364     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
365     for (i=0; i<a->mbs; i++) {
366       for (j=0; j<bs; j++) {
367         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
368         for (k=a->i[i]; k<a->i[i+1]; k++) {
369           for (l=0; l<bs; l++) {
370 #if defined(PETSC_USE_COMPLEX)
371             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
372               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
373                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
374             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
379             }
380 #else
381             if (a->a[bs2*k + l*bs + j] != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
383             }
384 #endif
385           }
386         }
387         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
388       }
389     }
390     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
391   } else {
392     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
393     for (i=0; i<a->mbs; i++) {
394       for (j=0; j<bs; j++) {
395         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
396         for (k=a->i[i]; k<a->i[i+1]; k++) {
397           for (l=0; l<bs; l++) {
398 #if defined(PETSC_USE_COMPLEX)
399             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
400               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
401                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
402             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
407             }
408 #else
409             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
410 #endif
411           }
412         }
413         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
414       }
415     }
416     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
417   }
418   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
425 {
426   Mat          A = (Mat) Aa;
427   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
428   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
429   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
430   MatScalar    *aa;
431   MPI_Comm     comm;
432   PetscViewer  viewer;
433 
434   PetscFunctionBegin;
435   /*
436       This is nasty. If this is called from an originally parallel matrix
437    then all processes call this,but only the first has the matrix so the
438    rest should return immediately.
439   */
440   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
441   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
442   if (rank) PetscFunctionReturn(0);
443 
444   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
445 
446   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
447   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
448 
449   /* loop over matrix elements drawing boxes */
450   color = PETSC_DRAW_BLUE;
451   for (i=0,row=0; i<mbs; i++,row+=bs) {
452     for (j=a->i[i]; j<a->i[i+1]; j++) {
453       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
454       x_l = a->j[j]*bs; x_r = x_l + 1.0;
455       aa = a->a + j*bs2;
456       for (k=0; k<bs; k++) {
457         for (l=0; l<bs; l++) {
458           if (PetscRealPart(*aa++) >=  0.) continue;
459           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
460         }
461       }
462     }
463   }
464   color = PETSC_DRAW_CYAN;
465   for (i=0,row=0; i<mbs; i++,row+=bs) {
466     for (j=a->i[i]; j<a->i[i+1]; j++) {
467       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
468       x_l = a->j[j]*bs; x_r = x_l + 1.0;
469       aa = a->a + j*bs2;
470       for (k=0; k<bs; k++) {
471         for (l=0; l<bs; l++) {
472           if (PetscRealPart(*aa++) != 0.) continue;
473           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478 
479   color = PETSC_DRAW_RED;
480   for (i=0,row=0; i<mbs; i++,row+=bs) {
481     for (j=a->i[i]; j<a->i[i+1]; j++) {
482       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
483       x_l = a->j[j]*bs; x_r = x_l + 1.0;
484       aa = a->a + j*bs2;
485       for (k=0; k<bs; k++) {
486         for (l=0; l<bs; l++) {
487           if (PetscRealPart(*aa++) <= 0.) continue;
488           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
499 {
500   int          ierr;
501   PetscReal    xl,yl,xr,yr,w,h;
502   PetscDraw    draw;
503   PetscTruth   isnull;
504 
505   PetscFunctionBegin;
506 
507   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
508   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
509 
510   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
511   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
512   xr += w;    yr += h;  xl = -w;     yl = -h;
513   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
514   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
515   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatView_SeqSBAIJ"
521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
522 {
523   int        ierr;
524   PetscTruth issocket,isascii,isbinary,isdraw;
525 
526   PetscFunctionBegin;
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
531   if (issocket) {
532     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
533   } else if (isascii){
534     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
535   } else if (isbinary) {
536     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
537   } else if (isdraw) {
538     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
539   } else {
540     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
548 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
549 {
550   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
551   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
552   int        *ai = a->i,*ailen = a->ilen;
553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
554   MatScalar  *ap,*aa = a->a,zero = 0.0;
555 
556   PetscFunctionBegin;
557   for (k=0; k<m; k++) { /* loop over rows */
558     row  = im[k]; brow = row/bs;
559     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
560     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
561     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
562     nrow = ailen[brow];
563     for (l=0; l<n; l++) { /* loop over columns */
564       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
565       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
566       col  = in[l] ;
567       bcol = col/bs;
568       cidx = col%bs;
569       ridx = row%bs;
570       high = nrow;
571       low  = 0; /* assume unsorted */
572       while (high-low > 5) {
573         t = (low+high)/2;
574         if (rp[t] > bcol) high = t;
575         else             low  = t;
576       }
577       for (i=low; i<high; i++) {
578         if (rp[i] > bcol) break;
579         if (rp[i] == bcol) {
580           *v++ = ap[bs2*i+bs*cidx+ridx];
581           goto finished;
582         }
583       }
584       *v++ = zero;
585       finished:;
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
595 {
596   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
597   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
598   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
599   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
600   PetscTruth      roworiented=a->roworiented;
601   const MatScalar *value = v;
602   MatScalar       *ap,*aa = a->a,*bap;
603 
604   PetscFunctionBegin;
605   if (roworiented) {
606     stepval = (n-1)*bs;
607   } else {
608     stepval = (m-1)*bs;
609   }
610   for (k=0; k<m; k++) { /* loop over added rows */
611     row  = im[k];
612     if (row < 0) continue;
613 #if defined(PETSC_USE_BOPT_g)
614     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
615 #endif
616     rp   = aj + ai[row];
617     ap   = aa + bs2*ai[row];
618     rmax = imax[row];
619     nrow = ailen[row];
620     low  = 0;
621     for (l=0; l<n; l++) { /* loop over added columns */
622       if (in[l] < 0) continue;
623 #if defined(PETSC_USE_BOPT_g)
624       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
625 #endif
626       col = in[l];
627       if (col < row) continue; /* ignore lower triangular block */
628       if (roworiented) {
629         value = v + k*(stepval+bs)*bs + l*bs;
630       } else {
631         value = v + l*(stepval+bs)*bs + k*bs;
632       }
633       if (!sorted) low = 0; high = nrow;
634       while (high-low > 7) {
635         t = (low+high)/2;
636         if (rp[t] > col) high = t;
637         else             low  = t;
638       }
639       for (i=low; i<high; i++) {
640         if (rp[i] > col) break;
641         if (rp[i] == col) {
642           bap  = ap +  bs2*i;
643           if (roworiented) {
644             if (is == ADD_VALUES) {
645               for (ii=0; ii<bs; ii++,value+=stepval) {
646                 for (jj=ii; jj<bs2; jj+=bs) {
647                   bap[jj] += *value++;
648                 }
649               }
650             } else {
651               for (ii=0; ii<bs; ii++,value+=stepval) {
652                 for (jj=ii; jj<bs2; jj+=bs) {
653                   bap[jj] = *value++;
654                 }
655               }
656             }
657           } else {
658             if (is == ADD_VALUES) {
659               for (ii=0; ii<bs; ii++,value+=stepval) {
660                 for (jj=0; jj<bs; jj++) {
661                   *bap++ += *value++;
662                 }
663               }
664             } else {
665               for (ii=0; ii<bs; ii++,value+=stepval) {
666                 for (jj=0; jj<bs; jj++) {
667                   *bap++  = *value++;
668                 }
669               }
670             }
671           }
672           goto noinsert2;
673         }
674       }
675       if (nonew == 1) goto noinsert2;
676       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
677       if (nrow >= rmax) {
678         /* there is no extra room in row, therefore enlarge */
679         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
680         MatScalar *new_a;
681 
682         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
683 
684         /* malloc new storage space */
685         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
686 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
687         new_j   = (int*)(new_a + bs2*new_nz);
688         new_i   = new_j + new_nz;
689 
690         /* copy over old data into new slots */
691         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
692         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
693         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
694         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
695         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
697         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
698         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
699         /* free up old matrix storage */
700         ierr = PetscFree(a->a);CHKERRQ(ierr);
701         if (!a->singlemalloc) {
702           ierr = PetscFree(a->i);CHKERRQ(ierr);
703           ierr = PetscFree(a->j);CHKERRQ(ierr);
704         }
705         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
706         a->singlemalloc = PETSC_TRUE;
707 
708         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
709         rmax = imax[row] = imax[row] + CHUNKSIZE;
710         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
711         a->s_maxnz += bs2*CHUNKSIZE;
712         a->reallocs++;
713         a->s_nz++;
714       }
715       N = nrow++ - 1;
716       /* shift up all the later entries in this row */
717       for (ii=N; ii>=i; ii--) {
718         rp[ii+1] = rp[ii];
719         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
720       }
721       if (N >= i) {
722         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       rp[i] = col;
725       bap   = ap +  bs2*i;
726       if (roworiented) {
727         for (ii=0; ii<bs; ii++,value+=stepval) {
728           for (jj=ii; jj<bs2; jj+=bs) {
729             bap[jj] = *value++;
730           }
731         }
732       } else {
733         for (ii=0; ii<bs; ii++,value+=stepval) {
734           for (jj=0; jj<bs; jj++) {
735             *bap++  = *value++;
736           }
737         }
738       }
739       noinsert2:;
740       low = i;
741     }
742     ailen[row] = nrow;
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
749 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
750 {
751   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
752   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
753   int        m = A->m,*ip,N,*ailen = a->ilen;
754   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
755   MatScalar  *aa = a->a,*ap;
756 #if defined(PETSC_HAVE_MUMPS)
757   PetscTruth   flag;
758 #endif
759 
760   PetscFunctionBegin;
761   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
762 
763   if (m) rmax = ailen[0];
764   for (i=1; i<mbs; i++) {
765     /* move each row back by the amount of empty slots (fshift) before it*/
766     fshift += imax[i-1] - ailen[i-1];
767     rmax   = PetscMax(rmax,ailen[i]);
768     if (fshift) {
769       ip = aj + ai[i]; ap = aa + bs2*ai[i];
770       N = ailen[i];
771       for (j=0; j<N; j++) {
772         ip[j-fshift] = ip[j];
773         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
774       }
775     }
776     ai[i] = ai[i-1] + ailen[i-1];
777   }
778   if (mbs) {
779     fshift += imax[mbs-1] - ailen[mbs-1];
780     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
781   }
782   /* reset ilen and imax for each row */
783   for (i=0; i<mbs; i++) {
784     ailen[i] = imax[i] = ai[i+1] - ai[i];
785   }
786   a->s_nz = ai[mbs];
787 
788   /* diagonals may have moved, reset it */
789   if (a->diag) {
790     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
791   }
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
793            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
795            a->reallocs);
796   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
797   a->reallocs          = 0;
798   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
799 
800 #if defined(PETSC_HAVE_MUMPS)
801   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
802   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
803 #endif
804 
805   PetscFunctionReturn(0);
806 }
807 
808 /*
809    This function returns an array of flags which indicate the locations of contiguous
810    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
811    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
812    Assume: sizes should be long enough to hold all the values.
813 */
814 #undef __FUNCT__
815 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
816 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
817 {
818   int        i,j,k,row;
819   PetscTruth flg;
820 
821   PetscFunctionBegin;
822   for (i=0,j=0; i<n; j++) {
823     row = idx[i];
824     if (row%bs!=0) { /* Not the begining of a block */
825       sizes[j] = 1;
826       i++;
827     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
828       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
829       i++;
830     } else { /* Begining of the block, so check if the complete block exists */
831       flg = PETSC_TRUE;
832       for (k=1; k<bs; k++) {
833         if (row+k != idx[i+k]) { /* break in the block */
834           flg = PETSC_FALSE;
835           break;
836         }
837       }
838       if (flg == PETSC_TRUE) { /* No break in the bs */
839         sizes[j] = bs;
840         i+= bs;
841       } else {
842         sizes[j] = 1;
843         i++;
844       }
845     }
846   }
847   *bs_max = j;
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
853 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
854 {
855   PetscFunctionBegin;
856   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
857 }
858 
859 /* Only add/insert a(i,j) with i<=j (blocks).
860    Any a(i,j) with i>j input by user is ingored.
861 */
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
865 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
866 {
867   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
868   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
869   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
870   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
871   int         ridx,cidx,bs2=a->bs2,ierr;
872   MatScalar   *ap,value,*aa=a->a,*bap;
873 
874   PetscFunctionBegin;
875 
876   for (k=0; k<m; k++) { /* loop over added rows */
877     row  = im[k];       /* row number */
878     brow = row/bs;      /* block row number */
879     if (row < 0) continue;
880 #if defined(PETSC_USE_BOPT_g)
881     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
882 #endif
883     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
884     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
885     rmax = imax[brow];  /* maximum space allocated for this row */
886     nrow = ailen[brow]; /* actual length of this row */
887     low  = 0;
888 
889     for (l=0; l<n; l++) { /* loop over added columns */
890       if (in[l] < 0) continue;
891 #if defined(PETSC_USE_BOPT_g)
892       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
893 #endif
894       col = in[l];
895       bcol = col/bs;              /* block col number */
896 
897       if (brow <= bcol){
898         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
899         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
900           /* element value a(k,l) */
901           if (roworiented) {
902             value = v[l + k*n];
903           } else {
904             value = v[k + l*m];
905           }
906 
907           /* move pointer bap to a(k,l) quickly and add/insert value */
908           if (!sorted) low = 0; high = nrow;
909           while (high-low > 7) {
910             t = (low+high)/2;
911             if (rp[t] > bcol) high = t;
912             else              low  = t;
913           }
914           for (i=low; i<high; i++) {
915             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
916             if (rp[i] > bcol) break;
917             if (rp[i] == bcol) {
918               bap  = ap +  bs2*i + bs*cidx + ridx;
919               if (is == ADD_VALUES) *bap += value;
920               else                  *bap  = value;
921               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
922               if (brow == bcol && ridx < cidx){
923                 bap  = ap +  bs2*i + bs*ridx + cidx;
924                 if (is == ADD_VALUES) *bap += value;
925                 else                  *bap  = value;
926               }
927               goto noinsert1;
928             }
929           }
930 
931       if (nonew == 1) goto noinsert1;
932       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
933       if (nrow >= rmax) {
934         /* there is no extra room in row, therefore enlarge */
935         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
936         MatScalar *new_a;
937 
938         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
939 
940         /* Malloc new storage space */
941         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
942         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
943         new_j = (int*)(new_a + bs2*new_nz);
944         new_i = new_j + new_nz;
945 
946         /* copy over old data into new slots */
947         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
948         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
949         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
950         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
951         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
952         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
953         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
954         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
955         /* free up old matrix storage */
956         ierr = PetscFree(a->a);CHKERRQ(ierr);
957         if (!a->singlemalloc) {
958           ierr = PetscFree(a->i);CHKERRQ(ierr);
959           ierr = PetscFree(a->j);CHKERRQ(ierr);
960         }
961         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
962         a->singlemalloc = PETSC_TRUE;
963 
964         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
965         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
966         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
967         a->s_maxnz += bs2*CHUNKSIZE;
968         a->reallocs++;
969         a->s_nz++;
970       }
971 
972       N = nrow++ - 1;
973       /* shift up all the later entries in this row */
974       for (ii=N; ii>=i; ii--) {
975         rp[ii+1] = rp[ii];
976         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
977       }
978       if (N>=i) {
979         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
980       }
981       rp[i]                      = bcol;
982       ap[bs2*i + bs*cidx + ridx] = value;
983       noinsert1:;
984       low = i;
985       /* } */
986         }
987       } /* end of if .. if..  */
988     }                     /* end of loop over added columns */
989     ailen[brow] = nrow;
990   }                       /* end of loop over added rows */
991 
992   PetscFunctionReturn(0);
993 }
994 
995 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
996 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
997 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
998 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
999 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
1000 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1001 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1002 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
1003 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1004 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1005 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1006 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1007 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1008 extern int MatZeroEntries_SeqSBAIJ(Mat);
1009 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1010 
1011 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1026 
1027 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1028 
1029 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1037 
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1046 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1047 
1048 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1056 
1057 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1065 
1066 #ifdef HAVE_MatICCFactor
1067 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1070 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1071 {
1072   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1073   Mat         outA;
1074   int         ierr;
1075   PetscTruth  row_identity,col_identity;
1076 
1077   PetscFunctionBegin;
1078   /*
1079   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1080   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1081   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1082   if (!row_identity || !col_identity) {
1083     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1084   }
1085   */
1086 
1087   outA          = inA;
1088   inA->factor   = FACTOR_CHOLESKY;
1089 
1090   if (!a->diag) {
1091     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1092   }
1093   /*
1094       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1095       for ILU(0) factorization with natural ordering
1096   */
1097   switch (a->bs) {
1098   case 1:
1099     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1100     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1101     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1102     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1103   case 2:
1104     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1105     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1106     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1107     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1108     break;
1109   case 3:
1110     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1111     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1112     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1113     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1114     break;
1115   case 4:
1116     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1117     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1118     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1119     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1120     break;
1121   case 5:
1122     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1123     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1124     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1125     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1126     break;
1127   case 6:
1128     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1129     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1130     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1131     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1132     break;
1133   case 7:
1134     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1135     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1136     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1137     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1138     break;
1139   default:
1140     a->row        = row;
1141     a->icol       = col;
1142     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1143     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1144 
1145     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1146     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1147     PetscLogObjectParent(inA,a->icol);
1148 
1149     if (!a->solve_work) {
1150       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1151       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1152     }
1153   }
1154 
1155   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1156 
1157   PetscFunctionReturn(0);
1158 }
1159 #endif
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1163 int MatPrintHelp_SeqSBAIJ(Mat A)
1164 {
1165   static PetscTruth called = PETSC_FALSE;
1166   MPI_Comm          comm = A->comm;
1167   int               ierr;
1168 
1169   PetscFunctionBegin;
1170   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1171   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1172   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1179 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1180 {
1181   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1182   int         i,nz,n;
1183 
1184   PetscFunctionBegin;
1185   nz = baij->s_maxnz;
1186   n  = mat->n;
1187   for (i=0; i<nz; i++) {
1188     baij->j[i] = indices[i];
1189   }
1190   baij->s_nz = nz;
1191   for (i=0; i<n; i++) {
1192     baij->ilen[i] = baij->imax[i];
1193   }
1194 
1195   PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1201 /*@
1202     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1203        in the matrix.
1204 
1205   Input Parameters:
1206 +  mat     - the SeqSBAIJ matrix
1207 -  indices - the column indices
1208 
1209   Level: advanced
1210 
1211   Notes:
1212     This can be called if you have precomputed the nonzero structure of the
1213   matrix and want to provide it to the matrix object to improve the performance
1214   of the MatSetValues() operation.
1215 
1216     You MUST have set the correct numbers of nonzeros per row in the call to
1217   MatCreateSeqSBAIJ().
1218 
1219     MUST be called before any calls to MatSetValues()
1220 
1221 .seealso: MatCreateSeqSBAIJ
1222 @*/
1223 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1224 {
1225   int ierr,(*f)(Mat,int *);
1226 
1227   PetscFunctionBegin;
1228   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1229   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1230   if (f) {
1231     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1232   } else {
1233     SETERRQ(1,"Wrong type of matrix to set column indices");
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1240 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1241 {
1242   int        ierr;
1243 
1244   PetscFunctionBegin;
1245   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1251 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1252 {
1253   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1254   PetscFunctionBegin;
1255   *array = a->a;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1261 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1262 {
1263   PetscFunctionBegin;
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #include "petscblaslapack.h"
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1270 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1271 {
1272   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1273   int          ierr,one=1,i,bs=y->bs,bs2,j;
1274 
1275   PetscFunctionBegin;
1276   if (str == SAME_NONZERO_PATTERN) {
1277     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1278   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1279     if (y->xtoy && y->XtoY != X) {
1280       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1281       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1282     }
1283     if (!y->xtoy) { /* get xtoy */
1284       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1285       y->XtoY = X;
1286     }
1287     bs2 = bs*bs;
1288     for (i=0; i<x->s_nz; i++) {
1289       j = 0;
1290       while (j < bs2){
1291         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1292         j++;
1293       }
1294     }
1295     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1296   } else {
1297     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /* -------------------------------------------------------------------*/
1303 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1304        MatGetRow_SeqSBAIJ,
1305        MatRestoreRow_SeqSBAIJ,
1306        MatMult_SeqSBAIJ_N,
1307        MatMultAdd_SeqSBAIJ_N,
1308        MatMultTranspose_SeqSBAIJ,
1309        MatMultTransposeAdd_SeqSBAIJ,
1310        MatSolve_SeqSBAIJ_N,
1311        0,
1312        0,
1313        0,
1314        0,
1315        MatCholeskyFactor_SeqSBAIJ,
1316        MatRelax_SeqSBAIJ,
1317        MatTranspose_SeqSBAIJ,
1318        MatGetInfo_SeqSBAIJ,
1319        MatEqual_SeqSBAIJ,
1320        MatGetDiagonal_SeqSBAIJ,
1321        MatDiagonalScale_SeqSBAIJ,
1322        MatNorm_SeqSBAIJ,
1323        0,
1324        MatAssemblyEnd_SeqSBAIJ,
1325        0,
1326        MatSetOption_SeqSBAIJ,
1327        MatZeroEntries_SeqSBAIJ,
1328        MatZeroRows_SeqSBAIJ,
1329        0,
1330        0,
1331        MatCholeskyFactorSymbolic_SeqSBAIJ,
1332        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1333        MatSetUpPreallocation_SeqSBAIJ,
1334        0,
1335        MatICCFactorSymbolic_SeqSBAIJ,
1336        MatGetArray_SeqSBAIJ,
1337        MatRestoreArray_SeqSBAIJ,
1338        MatDuplicate_SeqSBAIJ,
1339        0,
1340        0,
1341        0,
1342        0,
1343        MatAXPY_SeqSBAIJ,
1344        MatGetSubMatrices_SeqSBAIJ,
1345        MatIncreaseOverlap_SeqSBAIJ,
1346        MatGetValues_SeqSBAIJ,
1347        0,
1348        MatPrintHelp_SeqSBAIJ,
1349        MatScale_SeqSBAIJ,
1350        0,
1351        0,
1352        0,
1353        MatGetBlockSize_SeqSBAIJ,
1354        MatGetRowIJ_SeqSBAIJ,
1355        MatRestoreRowIJ_SeqSBAIJ,
1356        0,
1357        0,
1358        0,
1359        0,
1360        0,
1361        0,
1362        MatSetValuesBlocked_SeqSBAIJ,
1363        MatGetSubMatrix_SeqSBAIJ,
1364        0,
1365        0,
1366        MatGetPetscMaps_Petsc,
1367        0,
1368        0,
1369        0,
1370        0,
1371        0,
1372        0,
1373        MatGetRowMax_SeqSBAIJ,
1374        0,
1375        0,
1376        0,
1377        0,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386        0,
1387 #if !defined(PETSC_USE_COMPLEX)
1388        MatGetInertia_SeqSBAIJ
1389 #else
1390        0
1391 #endif
1392 };
1393 
1394 EXTERN_C_BEGIN
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1397 int MatStoreValues_SeqSBAIJ(Mat mat)
1398 {
1399   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1400   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1401   int         ierr;
1402 
1403   PetscFunctionBegin;
1404   if (aij->nonew != 1) {
1405     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1406   }
1407 
1408   /* allocate space for values if not already there */
1409   if (!aij->saved_values) {
1410     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1411   }
1412 
1413   /* copy values over */
1414   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 EXTERN_C_END
1418 
1419 EXTERN_C_BEGIN
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1422 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1423 {
1424   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1425   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1426 
1427   PetscFunctionBegin;
1428   if (aij->nonew != 1) {
1429     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1430   }
1431   if (!aij->saved_values) {
1432     SETERRQ(1,"Must call MatStoreValues(A);first");
1433   }
1434 
1435   /* copy values over */
1436   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 EXTERN_C_END
1440 
1441 EXTERN_C_BEGIN
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1444 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1445 {
1446   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1447   int          i,len,ierr,mbs,bs2;
1448   PetscTruth   flg;
1449   int          s_nz;
1450 
1451   PetscFunctionBegin;
1452   B->preallocated = PETSC_TRUE;
1453   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1454   mbs  = B->m/bs;
1455   bs2  = bs*bs;
1456 
1457   if (mbs*bs != B->m) {
1458     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1459   }
1460 
1461   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1462   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1463   if (nnz) {
1464     for (i=0; i<mbs; i++) {
1465       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1466       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1467     }
1468   }
1469 
1470   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1471   if (!flg) {
1472     switch (bs) {
1473     case 1:
1474       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1475       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1476       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1477       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1478       B->ops->mult            = MatMult_SeqSBAIJ_1;
1479       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1480       break;
1481     case 2:
1482       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1483       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1484       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1485       B->ops->mult            = MatMult_SeqSBAIJ_2;
1486       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1487       break;
1488     case 3:
1489       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1490       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1491       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1492       B->ops->mult            = MatMult_SeqSBAIJ_3;
1493       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1494       break;
1495     case 4:
1496       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1497       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1498       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1499       B->ops->mult            = MatMult_SeqSBAIJ_4;
1500       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1501       break;
1502     case 5:
1503       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1504       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1505       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1506       B->ops->mult            = MatMult_SeqSBAIJ_5;
1507       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1508       break;
1509     case 6:
1510       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1511       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1512       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1513       B->ops->mult            = MatMult_SeqSBAIJ_6;
1514       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1515       break;
1516     case 7:
1517       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1518       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1519       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1520       B->ops->mult            = MatMult_SeqSBAIJ_7;
1521       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1522       break;
1523     }
1524   }
1525 
1526   b->mbs = mbs;
1527   b->nbs = mbs;
1528   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1529   if (!nnz) {
1530     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1531     else if (nz <= 0)        nz = 1;
1532     for (i=0; i<mbs; i++) {
1533       b->imax[i] = nz;
1534     }
1535     nz = nz*mbs; /* total nz */
1536   } else {
1537     nz = 0;
1538     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1539   }
1540   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1541   s_nz = nz;
1542 
1543   /* allocate the matrix space */
1544   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1545   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1546   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1547   b->j = (int*)(b->a + s_nz*bs2);
1548   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1549   b->i = b->j + s_nz;
1550   b->singlemalloc = PETSC_TRUE;
1551 
1552   /* pointer to beginning of each row */
1553   b->i[0] = 0;
1554   for (i=1; i<mbs+1; i++) {
1555     b->i[i] = b->i[i-1] + b->imax[i-1];
1556   }
1557 
1558   /* b->ilen will count nonzeros in each block row so far. */
1559   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1560   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1561   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1562 
1563   b->bs               = bs;
1564   b->bs2              = bs2;
1565   b->s_nz             = 0;
1566   b->s_maxnz          = s_nz*bs2;
1567 
1568   b->inew             = 0;
1569   b->jnew             = 0;
1570   b->anew             = 0;
1571   b->a2anew           = 0;
1572   b->permute          = PETSC_FALSE;
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 EXTERN_C_BEGIN
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1580 int MatCreate_SeqSBAIJ(Mat B)
1581 {
1582   Mat_SeqSBAIJ *b;
1583   int          ierr,size;
1584 
1585   PetscFunctionBegin;
1586   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1587   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1588   B->m = B->M = PetscMax(B->m,B->M);
1589   B->n = B->N = PetscMax(B->n,B->N);
1590 
1591   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1592   B->data = (void*)b;
1593   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1594   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1595   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1596   B->ops->view        = MatView_SeqSBAIJ;
1597   B->factor           = 0;
1598   B->lupivotthreshold = 1.0;
1599   B->mapping          = 0;
1600   b->row              = 0;
1601   b->icol             = 0;
1602   b->reallocs         = 0;
1603   b->saved_values     = 0;
1604 
1605   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1606   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1607 
1608   b->sorted           = PETSC_FALSE;
1609   b->roworiented      = PETSC_TRUE;
1610   b->nonew            = 0;
1611   b->diag             = 0;
1612   b->solve_work       = 0;
1613   b->mult_work        = 0;
1614   B->spptr            = 0;
1615   b->keepzeroedrows   = PETSC_FALSE;
1616   b->xtoy             = 0;
1617   b->XtoY             = 0;
1618 
1619   b->inew             = 0;
1620   b->jnew             = 0;
1621   b->anew             = 0;
1622   b->a2anew           = 0;
1623   b->permute          = PETSC_FALSE;
1624 
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1626                                      "MatStoreValues_SeqSBAIJ",
1627                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1629                                      "MatRetrieveValues_SeqSBAIJ",
1630                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1632                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1633                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1635                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1636                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 EXTERN_C_END
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1643 /*@C
1644    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1645    compressed row) format.  For good matrix assembly performance the
1646    user should preallocate the matrix storage by setting the parameter nz
1647    (or the array nnz).  By setting these parameters accurately, performance
1648    during matrix assembly can be increased by more than a factor of 50.
1649 
1650    Collective on Mat
1651 
1652    Input Parameters:
1653 +  A - the symmetric matrix
1654 .  bs - size of block
1655 .  nz - number of block nonzeros per block row (same for all rows)
1656 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1657          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1658 
1659    Options Database Keys:
1660 .   -mat_no_unroll - uses code that does not unroll the loops in the
1661                      block calculations (much slower)
1662 .    -mat_block_size - size of the blocks to use
1663 
1664    Level: intermediate
1665 
1666    Notes:
1667    Specify the preallocated storage with either nz or nnz (not both).
1668    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1669    allocation.  For additional details, see the users manual chapter on
1670    matrices.
1671 
1672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1673 @*/
1674 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1675   int ierr,(*f)(Mat,int,int,const int[]);
1676 
1677   PetscFunctionBegin;
1678   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1679   if (f) {
1680     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1681   }
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatCreateSeqSBAIJ"
1687 /*@C
1688    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1689    compressed row) format.  For good matrix assembly performance the
1690    user should preallocate the matrix storage by setting the parameter nz
1691    (or the array nnz).  By setting these parameters accurately, performance
1692    during matrix assembly can be increased by more than a factor of 50.
1693 
1694    Collective on MPI_Comm
1695 
1696    Input Parameters:
1697 +  comm - MPI communicator, set to PETSC_COMM_SELF
1698 .  bs - size of block
1699 .  m - number of rows, or number of columns
1700 .  nz - number of block nonzeros per block row (same for all rows)
1701 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1702          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1703 
1704    Output Parameter:
1705 .  A - the symmetric matrix
1706 
1707    Options Database Keys:
1708 .   -mat_no_unroll - uses code that does not unroll the loops in the
1709                      block calculations (much slower)
1710 .    -mat_block_size - size of the blocks to use
1711 
1712    Level: intermediate
1713 
1714    Notes:
1715 
1716    Specify the preallocated storage with either nz or nnz (not both).
1717    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1718    allocation.  For additional details, see the users manual chapter on
1719    matrices.
1720 
1721 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1722 @*/
1723 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1724 {
1725   int ierr;
1726 
1727   PetscFunctionBegin;
1728   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1729   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1730   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1736 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1737 {
1738   Mat         C;
1739   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1740   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1741 
1742   PetscFunctionBegin;
1743   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1744 
1745   *B = 0;
1746   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1747   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1748   c    = (Mat_SeqSBAIJ*)C->data;
1749 
1750   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   C->preallocated   = PETSC_TRUE;
1752   C->factor         = A->factor;
1753   c->row            = 0;
1754   c->icol           = 0;
1755   c->saved_values   = 0;
1756   c->keepzeroedrows = a->keepzeroedrows;
1757   C->assembled      = PETSC_TRUE;
1758 
1759   c->bs         = a->bs;
1760   c->bs2        = a->bs2;
1761   c->mbs        = a->mbs;
1762   c->nbs        = a->nbs;
1763 
1764   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1765   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1766   for (i=0; i<mbs; i++) {
1767     c->imax[i] = a->imax[i];
1768     c->ilen[i] = a->ilen[i];
1769   }
1770 
1771   /* allocate the matrix space */
1772   c->singlemalloc = PETSC_TRUE;
1773   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1774   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1775   c->j = (int*)(c->a + nz*bs2);
1776   c->i = c->j + nz;
1777   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1778   if (mbs > 0) {
1779     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1780     if (cpvalues == MAT_COPY_VALUES) {
1781       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1782     } else {
1783       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1784     }
1785   }
1786 
1787   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1788   c->sorted      = a->sorted;
1789   c->roworiented = a->roworiented;
1790   c->nonew       = a->nonew;
1791 
1792   if (a->diag) {
1793     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1794     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1795     for (i=0; i<mbs; i++) {
1796       c->diag[i] = a->diag[i];
1797     }
1798   } else c->diag        = 0;
1799   c->s_nz               = a->s_nz;
1800   c->s_maxnz            = a->s_maxnz;
1801   c->solve_work         = 0;
1802   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1803   c->mult_work          = 0;
1804   *B = C;
1805   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 EXTERN_C_BEGIN
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1812 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1813 {
1814   Mat_SeqSBAIJ *a;
1815   Mat          B;
1816   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1817   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1818   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1819   int          *masked,nmask,tmp,bs2,ishift;
1820   PetscScalar  *aa;
1821   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1822 #if defined(PETSC_HAVE_MUMPS)
1823   PetscTruth   flag;
1824 #endif
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1828   bs2  = bs*bs;
1829 
1830   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1831   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1832   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1833   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1834   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1835   M = header[1]; N = header[2]; nz = header[3];
1836 
1837   if (header[3] < 0) {
1838     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1839   }
1840 
1841   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1842 
1843   /*
1844      This code adds extra rows to make sure the number of rows is
1845     divisible by the blocksize
1846   */
1847   mbs        = M/bs;
1848   extra_rows = bs - M + bs*(mbs);
1849   if (extra_rows == bs) extra_rows = 0;
1850   else                  mbs++;
1851   if (extra_rows) {
1852     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1853   }
1854 
1855   /* read in row lengths */
1856   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1857   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1858   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1859 
1860   /* read in column indices */
1861   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1862   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1863   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1864 
1865   /* loop over row lengths determining block row lengths */
1866   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1867   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1868   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1869   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1870   masked   = mask + mbs;
1871   rowcount = 0; nzcount = 0;
1872   for (i=0; i<mbs; i++) {
1873     nmask = 0;
1874     for (j=0; j<bs; j++) {
1875       kmax = rowlengths[rowcount];
1876       for (k=0; k<kmax; k++) {
1877         tmp = jj[nzcount++]/bs;   /* block col. index */
1878         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1879       }
1880       rowcount++;
1881     }
1882     s_browlengths[i] += nmask;
1883 
1884     /* zero out the mask elements we set */
1885     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1886   }
1887 
1888   /* create our matrix */
1889   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1890   ierr = MatSetType(B,type);CHKERRQ(ierr);
1891   ierr = MatSetSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1892   a = (Mat_SeqSBAIJ*)B->data;
1893 
1894   /* set matrix "i" values */
1895   a->i[0] = 0;
1896   for (i=1; i<= mbs; i++) {
1897     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1898     a->ilen[i-1] = s_browlengths[i-1];
1899   }
1900   a->s_nz = a->i[mbs];
1901 
1902   /* read in nonzero values */
1903   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1904   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1905   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1906 
1907   /* set "a" and "j" values into matrix */
1908   nzcount = 0; jcount = 0;
1909   for (i=0; i<mbs; i++) {
1910     nzcountb = nzcount;
1911     nmask    = 0;
1912     for (j=0; j<bs; j++) {
1913       kmax = rowlengths[i*bs+j];
1914       for (k=0; k<kmax; k++) {
1915         tmp = jj[nzcount++]/bs; /* block col. index */
1916         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1917       }
1918     }
1919     /* sort the masked values */
1920     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1921 
1922     /* set "j" values into matrix */
1923     maskcount = 1;
1924     for (j=0; j<nmask; j++) {
1925       a->j[jcount++]  = masked[j];
1926       mask[masked[j]] = maskcount++;
1927     }
1928 
1929     /* set "a" values into matrix */
1930     ishift = bs2*a->i[i];
1931     for (j=0; j<bs; j++) {
1932       kmax = rowlengths[i*bs+j];
1933       for (k=0; k<kmax; k++) {
1934         tmp       = jj[nzcountb]/bs ; /* block col. index */
1935         if (tmp >= i){
1936           block     = mask[tmp] - 1;
1937           point     = jj[nzcountb] - bs*tmp;
1938           idx       = ishift + bs2*block + j + bs*point;
1939           a->a[idx] = aa[nzcountb];
1940         }
1941         nzcountb++;
1942       }
1943     }
1944     /* zero out the mask elements we set */
1945     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1946   }
1947   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1948 
1949   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1950   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1951   ierr = PetscFree(aa);CHKERRQ(ierr);
1952   ierr = PetscFree(jj);CHKERRQ(ierr);
1953   ierr = PetscFree(mask);CHKERRQ(ierr);
1954 
1955   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957 #if defined(PETSC_HAVE_MUMPS)
1958   ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
1959   if (flag) { ierr = MatUseMUMPS_MPIAIJ(B);CHKERRQ(ierr); }
1960 #endif
1961   ierr = MatView_Private(B);CHKERRQ(ierr);
1962   *A = B;
1963   PetscFunctionReturn(0);
1964 }
1965 EXTERN_C_END
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1969 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1970 {
1971   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1972   MatScalar    *aa=a->a,*v,*v1;
1973   PetscScalar  *x,*b,*t,sum,d;
1974   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1975   int          nz,nz1,*vj,*vj1,i;
1976 
1977   PetscFunctionBegin;
1978   its = its*lits;
1979   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1980 
1981   if (bs > 1)
1982     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1983 
1984   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1985   if (xx != bb) {
1986     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1987   } else {
1988     b = x;
1989   }
1990 
1991   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1992 
1993   if (flag & SOR_ZERO_INITIAL_GUESS) {
1994     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1995       for (i=0; i<m; i++)
1996         t[i] = b[i];
1997 
1998       for (i=0; i<m; i++){
1999         d  = *(aa + ai[i]);  /* diag[i] */
2000         v  = aa + ai[i] + 1;
2001         vj = aj + ai[i] + 1;
2002         nz = ai[i+1] - ai[i] - 1;
2003         x[i] = omega*t[i]/d;
2004         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2005         PetscLogFlops(2*nz-1);
2006       }
2007     }
2008 
2009     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2010       for (i=0; i<m; i++)
2011         t[i] = b[i];
2012 
2013       for (i=0; i<m-1; i++){  /* update rhs */
2014         v  = aa + ai[i] + 1;
2015         vj = aj + ai[i] + 1;
2016         nz = ai[i+1] - ai[i] - 1;
2017         while (nz--) t[*vj++] -= x[i]*(*v++);
2018         PetscLogFlops(2*nz-1);
2019       }
2020       for (i=m-1; i>=0; i--){
2021         d  = *(aa + ai[i]);
2022         v  = aa + ai[i] + 1;
2023         vj = aj + ai[i] + 1;
2024         nz = ai[i+1] - ai[i] - 1;
2025         sum = t[i];
2026         while (nz--) sum -= x[*vj++]*(*v++);
2027         PetscLogFlops(2*nz-1);
2028         x[i] =   (1-omega)*x[i] + omega*sum/d;
2029       }
2030     }
2031     its--;
2032   }
2033 
2034   while (its--) {
2035     /*
2036        forward sweep:
2037        for i=0,...,m-1:
2038          sum[i] = (b[i] - U(i,:)x )/d[i];
2039          x[i]   = (1-omega)x[i] + omega*sum[i];
2040          b      = b - x[i]*U^T(i,:);
2041 
2042     */
2043     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2044       for (i=0; i<m; i++)
2045         t[i] = b[i];
2046 
2047       for (i=0; i<m; i++){
2048         d  = *(aa + ai[i]);  /* diag[i] */
2049         v  = aa + ai[i] + 1; v1=v;
2050         vj = aj + ai[i] + 1; vj1=vj;
2051         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2052         sum = t[i];
2053         while (nz1--) sum -= (*v1++)*x[*vj1++];
2054         x[i] = (1-omega)*x[i] + omega*sum/d;
2055         while (nz--) t[*vj++] -= x[i]*(*v++);
2056         PetscLogFlops(4*nz-2);
2057       }
2058     }
2059 
2060   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2061       /*
2062        backward sweep:
2063        b = b - x[i]*U^T(i,:), i=0,...,n-2
2064        for i=m-1,...,0:
2065          sum[i] = (b[i] - U(i,:)x )/d[i];
2066          x[i]   = (1-omega)x[i] + omega*sum[i];
2067       */
2068       for (i=0; i<m; i++)
2069         t[i] = b[i];
2070 
2071       for (i=0; i<m-1; i++){  /* update rhs */
2072         v  = aa + ai[i] + 1;
2073         vj = aj + ai[i] + 1;
2074         nz = ai[i+1] - ai[i] - 1;
2075         while (nz--) t[*vj++] -= x[i]*(*v++);
2076         PetscLogFlops(2*nz-1);
2077       }
2078       for (i=m-1; i>=0; i--){
2079         d  = *(aa + ai[i]);
2080         v  = aa + ai[i] + 1;
2081         vj = aj + ai[i] + 1;
2082         nz = ai[i+1] - ai[i] - 1;
2083         sum = t[i];
2084         while (nz--) sum -= x[*vj++]*(*v++);
2085         PetscLogFlops(2*nz-1);
2086         x[i] =   (1-omega)*x[i] + omega*sum/d;
2087       }
2088     }
2089   }
2090 
2091   ierr = PetscFree(t); CHKERRQ(ierr);
2092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2093   if (bb != xx) {
2094     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2095   }
2096 
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 
2101 
2102 
2103 
2104 
2105