xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 3e0d88b5a6015c3d4c47d785ae616615c3a35045)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
124   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
125   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
126   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
127 
128   if (a->inew){
129     ierr = PetscFree(a->inew);CHKERRQ(ierr);
130     a->inew = 0;
131   }
132   ierr = PetscFree(a);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
138 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
139 {
140   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
141 
142   PetscFunctionBegin;
143   switch (op) {
144   case MAT_ROW_ORIENTED:
145     a->roworiented    = PETSC_TRUE;
146     break;
147   case MAT_COLUMN_ORIENTED:
148     a->roworiented    = PETSC_FALSE;
149     break;
150   case MAT_COLUMNS_SORTED:
151     a->sorted         = PETSC_TRUE;
152     break;
153   case MAT_COLUMNS_UNSORTED:
154     a->sorted         = PETSC_FALSE;
155     break;
156   case MAT_KEEP_ZEROED_ROWS:
157     a->keepzeroedrows = PETSC_TRUE;
158     break;
159   case MAT_NO_NEW_NONZERO_LOCATIONS:
160     a->nonew          = 1;
161     break;
162   case MAT_NEW_NONZERO_LOCATION_ERR:
163     a->nonew          = -1;
164     break;
165   case MAT_NEW_NONZERO_ALLOCATION_ERR:
166     a->nonew          = -2;
167     break;
168   case MAT_YES_NEW_NONZERO_LOCATIONS:
169     a->nonew          = 0;
170     break;
171   case MAT_ROWS_SORTED:
172   case MAT_ROWS_UNSORTED:
173   case MAT_YES_NEW_DIAGONALS:
174   case MAT_IGNORE_OFF_PROC_ENTRIES:
175   case MAT_USE_HASH_TABLE:
176   case MAT_USE_SINGLE_PRECISION_SOLVES:
177     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
178     break;
179   case MAT_NO_NEW_DIAGONALS:
180     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
181   default:
182     SETERRQ(PETSC_ERR_SUP,"unknown option");
183   }
184   PetscFunctionReturn(0);
185 }
186 
187 #undef __FUNCT__
188 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
189 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
190 {
191   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
192   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
193   MatScalar    *aa,*aa_i;
194   PetscScalar  *v_i;
195 
196   PetscFunctionBegin;
197   bs  = a->bs;
198   ai  = a->i;
199   aj  = a->j;
200   aa  = a->a;
201   bs2 = a->bs2;
202 
203   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
204 
205   bn  = row/bs;   /* Block number */
206   bp  = row % bs; /* Block position */
207   M   = ai[bn+1] - ai[bn];
208   *ncols = bs*M;
209 
210   if (v) {
211     *v = 0;
212     if (*ncols) {
213       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
214       for (i=0; i<M; i++) { /* for each block in the block row */
215         v_i  = *v + i*bs;
216         aa_i = aa + bs2*(ai[bn] + i);
217         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
218       }
219     }
220   }
221 
222   if (cols) {
223     *cols = 0;
224     if (*ncols) {
225       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
226       for (i=0; i<M; i++) { /* for each block in the block row */
227         cols_i = *cols + i*bs;
228         itmp  = bs*aj[ai[bn] + i];
229         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
230       }
231     }
232   }
233 
234   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
235   /* this segment is currently removed, so only entries in the upper triangle are obtained */
236 #ifdef column_search
237   v_i    = *v    + M*bs;
238   cols_i = *cols + M*bs;
239   for (i=0; i<bn; i++){ /* for each block row */
240     M = ai[i+1] - ai[i];
241     for (j=0; j<M; j++){
242       itmp = aj[ai[i] + j];    /* block column value */
243       if (itmp == bn){
244         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
245         for (k=0; k<bs; k++) {
246           *cols_i++ = i*bs+k;
247           *v_i++    = aa_i[k];
248         }
249         *ncols += bs;
250         break;
251       }
252     }
253   }
254 #endif
255 
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
261 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
262 {
263   int ierr;
264 
265   PetscFunctionBegin;
266   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
267   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
273 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
274 {
275   int ierr;
276   PetscFunctionBegin;
277   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
283 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
284 {
285   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
286   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
287   PetscScalar  *aa;
288   FILE         *file;
289 
290   PetscFunctionBegin;
291   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
292   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
293   col_lens[0] = MAT_FILE_COOKIE;
294 
295   col_lens[1] = A->m;
296   col_lens[2] = A->m;
297   col_lens[3] = a->s_nz*bs2;
298 
299   /* store lengths of each row and write (including header) to file */
300   count = 0;
301   for (i=0; i<a->mbs; i++) {
302     for (j=0; j<bs; j++) {
303       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
304     }
305   }
306   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
307   ierr = PetscFree(col_lens);CHKERRQ(ierr);
308 
309   /* store column indices (zero start index) */
310   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
311   count = 0;
312   for (i=0; i<a->mbs; i++) {
313     for (j=0; j<bs; j++) {
314       for (k=a->i[i]; k<a->i[i+1]; k++) {
315         for (l=0; l<bs; l++) {
316           jj[count++] = bs*a->j[k] + l;
317         }
318       }
319     }
320   }
321   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
322   ierr = PetscFree(jj);CHKERRQ(ierr);
323 
324   /* store nonzero values */
325   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
326   count = 0;
327   for (i=0; i<a->mbs; i++) {
328     for (j=0; j<bs; j++) {
329       for (k=a->i[i]; k<a->i[i+1]; k++) {
330         for (l=0; l<bs; l++) {
331           aa[count++] = a->a[bs2*k + l*bs + j];
332         }
333       }
334     }
335   }
336   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
337   ierr = PetscFree(aa);CHKERRQ(ierr);
338 
339   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
340   if (file) {
341     fprintf(file,"-matload_block_size %d\n",a->bs);
342   }
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
348 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
349 {
350   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
351   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
352   char              *name;
353   PetscViewerFormat format;
354 
355   PetscFunctionBegin;
356   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
357   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
358   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
359     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
360   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
361     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
362   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
363     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
364     for (i=0; i<a->mbs; i++) {
365       for (j=0; j<bs; j++) {
366         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
367         for (k=a->i[i]; k<a->i[i+1]; k++) {
368           for (l=0; l<bs; l++) {
369 #if defined(PETSC_USE_COMPLEX)
370             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
371               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
372                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
373             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
374               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
375                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
376             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
377               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
378             }
379 #else
380             if (a->a[bs2*k + l*bs + j] != 0.0) {
381               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
382             }
383 #endif
384           }
385         }
386         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
387       }
388     }
389     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
390   } else {
391     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
392     for (i=0; i<a->mbs; i++) {
393       for (j=0; j<bs; j++) {
394         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
395         for (k=a->i[i]; k<a->i[i+1]; k++) {
396           for (l=0; l<bs; l++) {
397 #if defined(PETSC_USE_COMPLEX)
398             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
399               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
400                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
401             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
402               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
403                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
404             } else {
405               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
406             }
407 #else
408             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
409 #endif
410           }
411         }
412         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
413       }
414     }
415     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
416   }
417   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
423 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
424 {
425   Mat          A = (Mat) Aa;
426   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
427   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
428   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
429   MatScalar    *aa;
430   MPI_Comm     comm;
431   PetscViewer  viewer;
432 
433   PetscFunctionBegin;
434   /*
435       This is nasty. If this is called from an originally parallel matrix
436    then all processes call this,but only the first has the matrix so the
437    rest should return immediately.
438   */
439   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
440   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
441   if (rank) PetscFunctionReturn(0);
442 
443   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
444 
445   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
446   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
447 
448   /* loop over matrix elements drawing boxes */
449   color = PETSC_DRAW_BLUE;
450   for (i=0,row=0; i<mbs; i++,row+=bs) {
451     for (j=a->i[i]; j<a->i[i+1]; j++) {
452       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
453       x_l = a->j[j]*bs; x_r = x_l + 1.0;
454       aa = a->a + j*bs2;
455       for (k=0; k<bs; k++) {
456         for (l=0; l<bs; l++) {
457           if (PetscRealPart(*aa++) >=  0.) continue;
458           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
459         }
460       }
461     }
462   }
463   color = PETSC_DRAW_CYAN;
464   for (i=0,row=0; i<mbs; i++,row+=bs) {
465     for (j=a->i[i]; j<a->i[i+1]; j++) {
466       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
467       x_l = a->j[j]*bs; x_r = x_l + 1.0;
468       aa = a->a + j*bs2;
469       for (k=0; k<bs; k++) {
470         for (l=0; l<bs; l++) {
471           if (PetscRealPart(*aa++) != 0.) continue;
472           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
473         }
474       }
475     }
476   }
477 
478   color = PETSC_DRAW_RED;
479   for (i=0,row=0; i<mbs; i++,row+=bs) {
480     for (j=a->i[i]; j<a->i[i+1]; j++) {
481       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
482       x_l = a->j[j]*bs; x_r = x_l + 1.0;
483       aa = a->a + j*bs2;
484       for (k=0; k<bs; k++) {
485         for (l=0; l<bs; l++) {
486           if (PetscRealPart(*aa++) <= 0.) continue;
487           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
497 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
498 {
499   int          ierr;
500   PetscReal    xl,yl,xr,yr,w,h;
501   PetscDraw    draw;
502   PetscTruth   isnull;
503 
504   PetscFunctionBegin;
505 
506   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
507   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
508 
509   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
510   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
511   xr += w;    yr += h;  xl = -w;     yl = -h;
512   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
513   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
514   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "MatView_SeqSBAIJ"
520 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
521 {
522   int        ierr;
523   PetscTruth issocket,isascii,isbinary,isdraw;
524 
525   PetscFunctionBegin;
526   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
527   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
530   if (issocket) {
531     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
532   } else if (isascii){
533     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
534   } else if (isbinary) {
535     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
536   } else if (isdraw) {
537     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
538   } else {
539     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
547 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
548 {
549   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
550   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
551   int        *ai = a->i,*ailen = a->ilen;
552   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
553   MatScalar  *ap,*aa = a->a,zero = 0.0;
554 
555   PetscFunctionBegin;
556   for (k=0; k<m; k++) { /* loop over rows */
557     row  = im[k]; brow = row/bs;
558     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
559     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
560     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
561     nrow = ailen[brow];
562     for (l=0; l<n; l++) { /* loop over columns */
563       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
564       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
565       col  = in[l] ;
566       bcol = col/bs;
567       cidx = col%bs;
568       ridx = row%bs;
569       high = nrow;
570       low  = 0; /* assume unsorted */
571       while (high-low > 5) {
572         t = (low+high)/2;
573         if (rp[t] > bcol) high = t;
574         else             low  = t;
575       }
576       for (i=low; i<high; i++) {
577         if (rp[i] > bcol) break;
578         if (rp[i] == bcol) {
579           *v++ = ap[bs2*i+bs*cidx+ridx];
580           goto finished;
581         }
582       }
583       *v++ = zero;
584       finished:;
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
593 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
594 {
595   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
596   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
597   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
598   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
599   PetscTruth  roworiented=a->roworiented;
600   MatScalar   *value = v,*ap,*aa = a->a,*bap;
601 
602   PetscFunctionBegin;
603   if (roworiented) {
604     stepval = (n-1)*bs;
605   } else {
606     stepval = (m-1)*bs;
607   }
608   for (k=0; k<m; k++) { /* loop over added rows */
609     row  = im[k];
610     if (row < 0) continue;
611 #if defined(PETSC_USE_BOPT_g)
612     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
613 #endif
614     rp   = aj + ai[row];
615     ap   = aa + bs2*ai[row];
616     rmax = imax[row];
617     nrow = ailen[row];
618     low  = 0;
619     for (l=0; l<n; l++) { /* loop over added columns */
620       if (in[l] < 0) continue;
621 #if defined(PETSC_USE_BOPT_g)
622       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
623 #endif
624       col = in[l];
625       if (col < row) continue; /* ignore lower triangular block */
626       if (roworiented) {
627         value = v + k*(stepval+bs)*bs + l*bs;
628       } else {
629         value = v + l*(stepval+bs)*bs + k*bs;
630       }
631       if (!sorted) low = 0; high = nrow;
632       while (high-low > 7) {
633         t = (low+high)/2;
634         if (rp[t] > col) high = t;
635         else             low  = t;
636       }
637       for (i=low; i<high; i++) {
638         if (rp[i] > col) break;
639         if (rp[i] == col) {
640           bap  = ap +  bs2*i;
641           if (roworiented) {
642             if (is == ADD_VALUES) {
643               for (ii=0; ii<bs; ii++,value+=stepval) {
644                 for (jj=ii; jj<bs2; jj+=bs) {
645                   bap[jj] += *value++;
646                 }
647               }
648             } else {
649               for (ii=0; ii<bs; ii++,value+=stepval) {
650                 for (jj=ii; jj<bs2; jj+=bs) {
651                   bap[jj] = *value++;
652                 }
653               }
654             }
655           } else {
656             if (is == ADD_VALUES) {
657               for (ii=0; ii<bs; ii++,value+=stepval) {
658                 for (jj=0; jj<bs; jj++) {
659                   *bap++ += *value++;
660                 }
661               }
662             } else {
663               for (ii=0; ii<bs; ii++,value+=stepval) {
664                 for (jj=0; jj<bs; jj++) {
665                   *bap++  = *value++;
666                 }
667               }
668             }
669           }
670           goto noinsert2;
671         }
672       }
673       if (nonew == 1) goto noinsert2;
674       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
675       if (nrow >= rmax) {
676         /* there is no extra room in row, therefore enlarge */
677         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
678         MatScalar *new_a;
679 
680         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
681 
682         /* malloc new storage space */
683         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
684 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
685         new_j   = (int*)(new_a + bs2*new_nz);
686         new_i   = new_j + new_nz;
687 
688         /* copy over old data into new slots */
689         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
690         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
691         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
692         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
693         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
694         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
695         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
696         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
697         /* free up old matrix storage */
698         ierr = PetscFree(a->a);CHKERRQ(ierr);
699         if (!a->singlemalloc) {
700           ierr = PetscFree(a->i);CHKERRQ(ierr);
701           ierr = PetscFree(a->j);CHKERRQ(ierr);
702         }
703         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
704         a->singlemalloc = PETSC_TRUE;
705 
706         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
707         rmax = imax[row] = imax[row] + CHUNKSIZE;
708         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
709         a->s_maxnz += bs2*CHUNKSIZE;
710         a->reallocs++;
711         a->s_nz++;
712       }
713       N = nrow++ - 1;
714       /* shift up all the later entries in this row */
715       for (ii=N; ii>=i; ii--) {
716         rp[ii+1] = rp[ii];
717         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
718       }
719       if (N >= i) {
720         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
721       }
722       rp[i] = col;
723       bap   = ap +  bs2*i;
724       if (roworiented) {
725         for (ii=0; ii<bs; ii++,value+=stepval) {
726           for (jj=ii; jj<bs2; jj+=bs) {
727             bap[jj] = *value++;
728           }
729         }
730       } else {
731         for (ii=0; ii<bs; ii++,value+=stepval) {
732           for (jj=0; jj<bs; jj++) {
733             *bap++  = *value++;
734           }
735         }
736       }
737       noinsert2:;
738       low = i;
739     }
740     ailen[row] = nrow;
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
747 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
748 {
749   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
750   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
751   int        m = A->m,*ip,N,*ailen = a->ilen;
752   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
753   MatScalar  *aa = a->a,*ap;
754 #if defined(PETSC_HAVE_SPOOLES)
755   PetscTruth   flag;
756 #endif
757 
758   PetscFunctionBegin;
759   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
760 
761   if (m) rmax = ailen[0];
762   for (i=1; i<mbs; i++) {
763     /* move each row back by the amount of empty slots (fshift) before it*/
764     fshift += imax[i-1] - ailen[i-1];
765     rmax   = PetscMax(rmax,ailen[i]);
766     if (fshift) {
767       ip = aj + ai[i]; ap = aa + bs2*ai[i];
768       N = ailen[i];
769       for (j=0; j<N; j++) {
770         ip[j-fshift] = ip[j];
771         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
772       }
773     }
774     ai[i] = ai[i-1] + ailen[i-1];
775   }
776   if (mbs) {
777     fshift += imax[mbs-1] - ailen[mbs-1];
778     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
779   }
780   /* reset ilen and imax for each row */
781   for (i=0; i<mbs; i++) {
782     ailen[i] = imax[i] = ai[i+1] - ai[i];
783   }
784   a->s_nz = ai[mbs];
785 
786   /* diagonals may have moved, reset it */
787   if (a->diag) {
788     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
789   }
790   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
791            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
793            a->reallocs);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
795   a->reallocs          = 0;
796   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
797 
798 #if defined(PETSC_HAVE_SPOOLES)
799   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
800   if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); }
801 #endif
802 
803   PetscFunctionReturn(0);
804 }
805 
806 /*
807    This function returns an array of flags which indicate the locations of contiguous
808    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
809    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
810    Assume: sizes should be long enough to hold all the values.
811 */
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
814 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
815 {
816   int        i,j,k,row;
817   PetscTruth flg;
818 
819   PetscFunctionBegin;
820   for (i=0,j=0; i<n; j++) {
821     row = idx[i];
822     if (row%bs!=0) { /* Not the begining of a block */
823       sizes[j] = 1;
824       i++;
825     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
826       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
827       i++;
828     } else { /* Begining of the block, so check if the complete block exists */
829       flg = PETSC_TRUE;
830       for (k=1; k<bs; k++) {
831         if (row+k != idx[i+k]) { /* break in the block */
832           flg = PETSC_FALSE;
833           break;
834         }
835       }
836       if (flg == PETSC_TRUE) { /* No break in the bs */
837         sizes[j] = bs;
838         i+= bs;
839       } else {
840         sizes[j] = 1;
841         i++;
842       }
843     }
844   }
845   *bs_max = j;
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
851 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
852 {
853   PetscFunctionBegin;
854   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
855 }
856 
857 /* Only add/insert a(i,j) with i<=j (blocks).
858    Any a(i,j) with i>j input by user is ingored.
859 */
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
863 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
864 {
865   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
866   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
867   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
868   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
869   int         ridx,cidx,bs2=a->bs2,ierr;
870   MatScalar   *ap,value,*aa=a->a,*bap;
871 
872   PetscFunctionBegin;
873 
874   for (k=0; k<m; k++) { /* loop over added rows */
875     row  = im[k];       /* row number */
876     brow = row/bs;      /* block row number */
877     if (row < 0) continue;
878 #if defined(PETSC_USE_BOPT_g)
879     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
880 #endif
881     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
882     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
883     rmax = imax[brow];  /* maximum space allocated for this row */
884     nrow = ailen[brow]; /* actual length of this row */
885     low  = 0;
886 
887     for (l=0; l<n; l++) { /* loop over added columns */
888       if (in[l] < 0) continue;
889 #if defined(PETSC_USE_BOPT_g)
890       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
891 #endif
892       col = in[l];
893       bcol = col/bs;              /* block col number */
894 
895       if (brow <= bcol){
896         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
897         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
898           /* element value a(k,l) */
899           if (roworiented) {
900             value = v[l + k*n];
901           } else {
902             value = v[k + l*m];
903           }
904 
905           /* move pointer bap to a(k,l) quickly and add/insert value */
906           if (!sorted) low = 0; high = nrow;
907           while (high-low > 7) {
908             t = (low+high)/2;
909             if (rp[t] > bcol) high = t;
910             else              low  = t;
911           }
912           for (i=low; i<high; i++) {
913             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
914             if (rp[i] > bcol) break;
915             if (rp[i] == bcol) {
916               bap  = ap +  bs2*i + bs*cidx + ridx;
917               if (is == ADD_VALUES) *bap += value;
918               else                  *bap  = value;
919               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
920               if (brow == bcol && ridx < cidx){
921                 bap  = ap +  bs2*i + bs*ridx + cidx;
922                 if (is == ADD_VALUES) *bap += value;
923                 else                  *bap  = value;
924               }
925               goto noinsert1;
926             }
927           }
928 
929       if (nonew == 1) goto noinsert1;
930       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
931       if (nrow >= rmax) {
932         /* there is no extra room in row, therefore enlarge */
933         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
934         MatScalar *new_a;
935 
936         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
937 
938         /* Malloc new storage space */
939         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
940         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
941         new_j = (int*)(new_a + bs2*new_nz);
942         new_i = new_j + new_nz;
943 
944         /* copy over old data into new slots */
945         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
946         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
947         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
948         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
949         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
950         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
951         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
952         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
953         /* free up old matrix storage */
954         ierr = PetscFree(a->a);CHKERRQ(ierr);
955         if (!a->singlemalloc) {
956           ierr = PetscFree(a->i);CHKERRQ(ierr);
957           ierr = PetscFree(a->j);CHKERRQ(ierr);
958         }
959         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
960         a->singlemalloc = PETSC_TRUE;
961 
962         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
963         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
964         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
965         a->s_maxnz += bs2*CHUNKSIZE;
966         a->reallocs++;
967         a->s_nz++;
968       }
969 
970       N = nrow++ - 1;
971       /* shift up all the later entries in this row */
972       for (ii=N; ii>=i; ii--) {
973         rp[ii+1] = rp[ii];
974         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
975       }
976       if (N>=i) {
977         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
978       }
979       rp[i]                      = bcol;
980       ap[bs2*i + bs*cidx + ridx] = value;
981       noinsert1:;
982       low = i;
983       /* } */
984         }
985       } /* end of if .. if..  */
986     }                     /* end of loop over added columns */
987     ailen[brow] = nrow;
988   }                       /* end of loop over added rows */
989 
990   PetscFunctionReturn(0);
991 }
992 
993 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
994 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
995 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
996 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
997 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
998 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
999 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1000 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
1001 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1002 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1003 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1004 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1005 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1006 extern int MatZeroEntries_SeqSBAIJ(Mat);
1007 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1008 
1009 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1011 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1018 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1024 
1025 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1029 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1033 
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1042 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1043 
1044 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1048 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1052 
1053 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1057 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1061 
1062 #ifdef HAVE_MatICCFactor
1063 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1066 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
1067 {
1068   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1069   Mat         outA;
1070   int         ierr;
1071   PetscTruth  row_identity,col_identity;
1072 
1073   PetscFunctionBegin;
1074   /*
1075   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1076   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1077   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1078   if (!row_identity || !col_identity) {
1079     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1080   }
1081   */
1082 
1083   outA          = inA;
1084   inA->factor   = FACTOR_CHOLESKY;
1085 
1086   if (!a->diag) {
1087     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1088   }
1089   /*
1090       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1091       for ILU(0) factorization with natural ordering
1092   */
1093   switch (a->bs) {
1094   case 1:
1095     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1096     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1097     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1098   case 2:
1099     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1100     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1101     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1102     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1103     break;
1104   case 3:
1105     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1106     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1107     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1108     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1109     break;
1110   case 4:
1111     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1112     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1113     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1114     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1115     break;
1116   case 5:
1117     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1118     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1119     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1120     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1121     break;
1122   case 6:
1123     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1124     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1125     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1126     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1127     break;
1128   case 7:
1129     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1130     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1131     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1132     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1133     break;
1134   default:
1135     a->row        = row;
1136     a->icol       = col;
1137     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1138     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1139 
1140     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1141     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1142     PetscLogObjectParent(inA,a->icol);
1143 
1144     if (!a->solve_work) {
1145       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1146       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1147     }
1148   }
1149 
1150   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1151 
1152   PetscFunctionReturn(0);
1153 }
1154 #endif
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1158 int MatPrintHelp_SeqSBAIJ(Mat A)
1159 {
1160   static PetscTruth called = PETSC_FALSE;
1161   MPI_Comm          comm = A->comm;
1162   int               ierr;
1163 
1164   PetscFunctionBegin;
1165   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1166   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1167   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1174 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1175 {
1176   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1177   int         i,nz,n;
1178 
1179   PetscFunctionBegin;
1180   nz = baij->s_maxnz;
1181   n  = mat->n;
1182   for (i=0; i<nz; i++) {
1183     baij->j[i] = indices[i];
1184   }
1185   baij->s_nz = nz;
1186   for (i=0; i<n; i++) {
1187     baij->ilen[i] = baij->imax[i];
1188   }
1189 
1190   PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1196 /*@
1197     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1198        in the matrix.
1199 
1200   Input Parameters:
1201 +  mat     - the SeqSBAIJ matrix
1202 -  indices - the column indices
1203 
1204   Level: advanced
1205 
1206   Notes:
1207     This can be called if you have precomputed the nonzero structure of the
1208   matrix and want to provide it to the matrix object to improve the performance
1209   of the MatSetValues() operation.
1210 
1211     You MUST have set the correct numbers of nonzeros per row in the call to
1212   MatCreateSeqSBAIJ().
1213 
1214     MUST be called before any calls to MatSetValues()
1215 
1216 .seealso: MatCreateSeqSBAIJ
1217 @*/
1218 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1219 {
1220   int ierr,(*f)(Mat,int *);
1221 
1222   PetscFunctionBegin;
1223   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1224   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1225   if (f) {
1226     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1227   } else {
1228     SETERRQ(1,"Wrong type of matrix to set column indices");
1229   }
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1235 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1236 {
1237   int        ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1246 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array)
1247 {
1248   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1249   PetscFunctionBegin;
1250   *array = a->a;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1256 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array)
1257 {
1258   PetscFunctionBegin;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #include "petscblaslapack.h"
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1265 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1266 {
1267   int          ierr,one=1;
1268   Mat_SeqSBAIJ *x  = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data;
1269 
1270   PetscFunctionBegin;
1271   if (str == SAME_NONZERO_PATTERN) {
1272     BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one);
1273   } else {
1274     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* -------------------------------------------------------------------*/
1280 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1281        MatGetRow_SeqSBAIJ,
1282        MatRestoreRow_SeqSBAIJ,
1283        MatMult_SeqSBAIJ_N,
1284        MatMultAdd_SeqSBAIJ_N,
1285        MatMultTranspose_SeqSBAIJ,
1286        MatMultTransposeAdd_SeqSBAIJ,
1287        MatSolve_SeqSBAIJ_N,
1288        0,
1289        0,
1290        0,
1291        0,
1292        MatCholeskyFactor_SeqSBAIJ,
1293        MatRelax_SeqSBAIJ,
1294        MatTranspose_SeqSBAIJ,
1295        MatGetInfo_SeqSBAIJ,
1296        MatEqual_SeqSBAIJ,
1297        MatGetDiagonal_SeqSBAIJ,
1298        MatDiagonalScale_SeqSBAIJ,
1299        MatNorm_SeqSBAIJ,
1300        0,
1301        MatAssemblyEnd_SeqSBAIJ,
1302        0,
1303        MatSetOption_SeqSBAIJ,
1304        MatZeroEntries_SeqSBAIJ,
1305        MatZeroRows_SeqSBAIJ,
1306        0,
1307        0,
1308        MatCholeskyFactorSymbolic_SeqSBAIJ,
1309        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1310        MatSetUpPreallocation_SeqSBAIJ,
1311        0,
1312        MatICCFactorSymbolic_SeqSBAIJ,
1313        MatGetArray_SeqSBAIJ,
1314        MatRestoreArray_SeqSBAIJ,
1315        MatDuplicate_SeqSBAIJ,
1316        0,
1317        0,
1318        0,
1319        0,
1320        MatAXPY_SeqSBAIJ,
1321        MatGetSubMatrices_SeqSBAIJ,
1322        MatIncreaseOverlap_SeqSBAIJ,
1323        MatGetValues_SeqSBAIJ,
1324        0,
1325        MatPrintHelp_SeqSBAIJ,
1326        MatScale_SeqSBAIJ,
1327        0,
1328        0,
1329        0,
1330        MatGetBlockSize_SeqSBAIJ,
1331        MatGetRowIJ_SeqSBAIJ,
1332        MatRestoreRowIJ_SeqSBAIJ,
1333        0,
1334        0,
1335        0,
1336        0,
1337        0,
1338        0,
1339        MatSetValuesBlocked_SeqSBAIJ,
1340        MatGetSubMatrix_SeqSBAIJ,
1341        0,
1342        0,
1343        MatGetPetscMaps_Petsc,
1344        0,
1345        0,
1346        0,
1347        0,
1348        0,
1349        0,
1350        MatGetRowMax_SeqSBAIJ,
1351        0,
1352        0,
1353        0,
1354        0,
1355        0,
1356        0,
1357        0,
1358        0,
1359        0,
1360        0,
1361        0,
1362        0,
1363        0,
1364 #if !defined(PETSC_USE_COMPLEX)
1365        MatGetInertia_SeqSBAIJ
1366 #else
1367        0
1368 #endif
1369 };
1370 
1371 EXTERN_C_BEGIN
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1374 int MatStoreValues_SeqSBAIJ(Mat mat)
1375 {
1376   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1377   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1378   int         ierr;
1379 
1380   PetscFunctionBegin;
1381   if (aij->nonew != 1) {
1382     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1383   }
1384 
1385   /* allocate space for values if not already there */
1386   if (!aij->saved_values) {
1387     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1388   }
1389 
1390   /* copy values over */
1391   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 EXTERN_C_END
1395 
1396 EXTERN_C_BEGIN
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1399 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1400 {
1401   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1402   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1403 
1404   PetscFunctionBegin;
1405   if (aij->nonew != 1) {
1406     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1407   }
1408   if (!aij->saved_values) {
1409     SETERRQ(1,"Must call MatStoreValues(A);first");
1410   }
1411 
1412   /* copy values over */
1413   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 EXTERN_C_END
1417 
1418 EXTERN_C_BEGIN
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1421 int MatCreate_SeqSBAIJ(Mat B)
1422 {
1423   Mat_SeqSBAIJ *b;
1424   int          ierr,size;
1425 
1426   PetscFunctionBegin;
1427   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1428   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1429   B->m = B->M = PetscMax(B->m,B->M);
1430   B->n = B->N = PetscMax(B->n,B->N);
1431 
1432   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1433   B->data = (void*)b;
1434   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1435   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1436   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1437   B->ops->view        = MatView_SeqSBAIJ;
1438   B->factor           = 0;
1439   B->lupivotthreshold = 1.0;
1440   B->mapping          = 0;
1441   b->row              = 0;
1442   b->icol             = 0;
1443   b->reallocs         = 0;
1444   b->saved_values     = 0;
1445 
1446   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1447   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1448 
1449   b->sorted           = PETSC_FALSE;
1450   b->roworiented      = PETSC_TRUE;
1451   b->nonew            = 0;
1452   b->diag             = 0;
1453   b->solve_work       = 0;
1454   b->mult_work        = 0;
1455   B->spptr            = 0;
1456   b->keepzeroedrows   = PETSC_FALSE;
1457 
1458   b->inew             = 0;
1459   b->jnew             = 0;
1460   b->anew             = 0;
1461   b->a2anew           = 0;
1462   b->permute          = PETSC_FALSE;
1463 
1464   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1465                                      "MatStoreValues_SeqSBAIJ",
1466                                      (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1468                                      "MatRetrieveValues_SeqSBAIJ",
1469                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1471                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1472                                      (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 EXTERN_C_END
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1479 /*@C
1480    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1481    compressed row) format.  For good matrix assembly performance the
1482    user should preallocate the matrix storage by setting the parameter nz
1483    (or the array nnz).  By setting these parameters accurately, performance
1484    during matrix assembly can be increased by more than a factor of 50.
1485 
1486    Collective on Mat
1487 
1488    Input Parameters:
1489 +  A - the symmetric matrix
1490 .  bs - size of block
1491 .  nz - number of block nonzeros per block row (same for all rows)
1492 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1493          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1494 
1495    Options Database Keys:
1496 .   -mat_no_unroll - uses code that does not unroll the loops in the
1497                      block calculations (much slower)
1498 .    -mat_block_size - size of the blocks to use
1499 
1500    Level: intermediate
1501 
1502    Notes:
1503    Specify the preallocated storage with either nz or nnz (not both).
1504    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1505    allocation.  For additional details, see the users manual chapter on
1506    matrices.
1507 
1508 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1509 @*/
1510 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1511 {
1512   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1513   int          i,len,ierr,mbs,bs2;
1514   PetscTruth   flg;
1515   int          s_nz;
1516 
1517   PetscFunctionBegin;
1518   B->preallocated = PETSC_TRUE;
1519   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1520   mbs  = B->m/bs;
1521   bs2  = bs*bs;
1522 
1523   if (mbs*bs != B->m) {
1524     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1525   }
1526 
1527   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1528   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1529   if (nnz) {
1530     for (i=0; i<mbs; i++) {
1531       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1532       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1533     }
1534   }
1535 
1536   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1537   if (!flg) {
1538     switch (bs) {
1539     case 1:
1540       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1541       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1542       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1543       B->ops->mult            = MatMult_SeqSBAIJ_1;
1544       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1545       break;
1546     case 2:
1547       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1548       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1549       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1550       B->ops->mult            = MatMult_SeqSBAIJ_2;
1551       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1552       break;
1553     case 3:
1554       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1555       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1556       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1557       B->ops->mult            = MatMult_SeqSBAIJ_3;
1558       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1559       break;
1560     case 4:
1561       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1562       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1563       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1564       B->ops->mult            = MatMult_SeqSBAIJ_4;
1565       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1566       break;
1567     case 5:
1568       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1569       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1570       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1571       B->ops->mult            = MatMult_SeqSBAIJ_5;
1572       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1573       break;
1574     case 6:
1575       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1576       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1577       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1578       B->ops->mult            = MatMult_SeqSBAIJ_6;
1579       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1580       break;
1581     case 7:
1582       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1583       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1584       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1585       B->ops->mult            = MatMult_SeqSBAIJ_7;
1586       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1587       break;
1588     }
1589   }
1590 
1591   b->mbs = mbs;
1592   b->nbs = mbs;
1593   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1594   if (!nnz) {
1595     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1596     else if (nz <= 0)        nz = 1;
1597     for (i=0; i<mbs; i++) {
1598       b->imax[i] = nz;
1599     }
1600     nz = nz*mbs; /* total nz */
1601   } else {
1602     nz = 0;
1603     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1604   }
1605   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1606   s_nz = nz;
1607 
1608   /* allocate the matrix space */
1609   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1610   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1611   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1612   b->j = (int*)(b->a + s_nz*bs2);
1613   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1614   b->i = b->j + s_nz;
1615   b->singlemalloc = PETSC_TRUE;
1616 
1617   /* pointer to beginning of each row */
1618   b->i[0] = 0;
1619   for (i=1; i<mbs+1; i++) {
1620     b->i[i] = b->i[i-1] + b->imax[i-1];
1621   }
1622 
1623   /* b->ilen will count nonzeros in each block row so far. */
1624   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1625   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1626   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1627 
1628   b->bs               = bs;
1629   b->bs2              = bs2;
1630   b->s_nz             = 0;
1631   b->s_maxnz          = s_nz*bs2;
1632 
1633   b->inew             = 0;
1634   b->jnew             = 0;
1635   b->anew             = 0;
1636   b->a2anew           = 0;
1637   b->permute          = PETSC_FALSE;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatCreateSeqSBAIJ"
1644 /*@C
1645    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1646    compressed row) format.  For good matrix assembly performance the
1647    user should preallocate the matrix storage by setting the parameter nz
1648    (or the array nnz).  By setting these parameters accurately, performance
1649    during matrix assembly can be increased by more than a factor of 50.
1650 
1651    Collective on MPI_Comm
1652 
1653    Input Parameters:
1654 +  comm - MPI communicator, set to PETSC_COMM_SELF
1655 .  bs - size of block
1656 .  m - number of rows, or number of columns
1657 .  nz - number of block nonzeros per block row (same for all rows)
1658 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1659          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1660 
1661    Output Parameter:
1662 .  A - the symmetric matrix
1663 
1664    Options Database Keys:
1665 .   -mat_no_unroll - uses code that does not unroll the loops in the
1666                      block calculations (much slower)
1667 .    -mat_block_size - size of the blocks to use
1668 
1669    Level: intermediate
1670 
1671    Notes:
1672 
1673    Specify the preallocated storage with either nz or nnz (not both).
1674    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1675    allocation.  For additional details, see the users manual chapter on
1676    matrices.
1677 
1678 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1679 @*/
1680 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1681 {
1682   int ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1686   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1687   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1693 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1694 {
1695   Mat         C;
1696   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1697   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1698 
1699   PetscFunctionBegin;
1700   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1701 
1702   *B = 0;
1703   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1704   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1705   c    = (Mat_SeqSBAIJ*)C->data;
1706 
1707   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1708   C->preallocated   = PETSC_TRUE;
1709   C->factor         = A->factor;
1710   c->row            = 0;
1711   c->icol           = 0;
1712   c->saved_values   = 0;
1713   c->keepzeroedrows = a->keepzeroedrows;
1714   C->assembled      = PETSC_TRUE;
1715 
1716   c->bs         = a->bs;
1717   c->bs2        = a->bs2;
1718   c->mbs        = a->mbs;
1719   c->nbs        = a->nbs;
1720 
1721   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1722   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1723   for (i=0; i<mbs; i++) {
1724     c->imax[i] = a->imax[i];
1725     c->ilen[i] = a->ilen[i];
1726   }
1727 
1728   /* allocate the matrix space */
1729   c->singlemalloc = PETSC_TRUE;
1730   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1731   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1732   c->j = (int*)(c->a + nz*bs2);
1733   c->i = c->j + nz;
1734   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1735   if (mbs > 0) {
1736     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1737     if (cpvalues == MAT_COPY_VALUES) {
1738       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1739     } else {
1740       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1741     }
1742   }
1743 
1744   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1745   c->sorted      = a->sorted;
1746   c->roworiented = a->roworiented;
1747   c->nonew       = a->nonew;
1748 
1749   if (a->diag) {
1750     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1751     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1752     for (i=0; i<mbs; i++) {
1753       c->diag[i] = a->diag[i];
1754     }
1755   } else c->diag        = 0;
1756   c->s_nz               = a->s_nz;
1757   c->s_maxnz            = a->s_maxnz;
1758   c->solve_work         = 0;
1759   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1760   c->mult_work          = 0;
1761   *B = C;
1762   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 EXTERN_C_BEGIN
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1769 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1770 {
1771   Mat_SeqSBAIJ *a;
1772   Mat          B;
1773   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1774   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1775   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1776   int          *masked,nmask,tmp,bs2,ishift;
1777   PetscScalar  *aa;
1778   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1779 
1780   PetscFunctionBegin;
1781   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1782   bs2  = bs*bs;
1783 
1784   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1785   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1786   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1787   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1788   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1789   M = header[1]; N = header[2]; nz = header[3];
1790 
1791   if (header[3] < 0) {
1792     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1793   }
1794 
1795   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1796 
1797   /*
1798      This code adds extra rows to make sure the number of rows is
1799     divisible by the blocksize
1800   */
1801   mbs        = M/bs;
1802   extra_rows = bs - M + bs*(mbs);
1803   if (extra_rows == bs) extra_rows = 0;
1804   else                  mbs++;
1805   if (extra_rows) {
1806     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1807   }
1808 
1809   /* read in row lengths */
1810   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1811   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1812   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1813 
1814   /* read in column indices */
1815   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1816   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1817   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1818 
1819   /* loop over row lengths determining block row lengths */
1820   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1821   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1822   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1823   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1824   masked   = mask + mbs;
1825   rowcount = 0; nzcount = 0;
1826   for (i=0; i<mbs; i++) {
1827     nmask = 0;
1828     for (j=0; j<bs; j++) {
1829       kmax = rowlengths[rowcount];
1830       for (k=0; k<kmax; k++) {
1831         tmp = jj[nzcount++]/bs;   /* block col. index */
1832         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1833       }
1834       rowcount++;
1835     }
1836     s_browlengths[i] += nmask;
1837 
1838     /* zero out the mask elements we set */
1839     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1840   }
1841 
1842   /* create our matrix */
1843   ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr);
1844   B = *A;
1845   a = (Mat_SeqSBAIJ*)B->data;
1846 
1847   /* set matrix "i" values */
1848   a->i[0] = 0;
1849   for (i=1; i<= mbs; i++) {
1850     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1851     a->ilen[i-1] = s_browlengths[i-1];
1852   }
1853   a->s_nz = a->i[mbs];
1854 
1855   /* read in nonzero values */
1856   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1857   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1858   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1859 
1860   /* set "a" and "j" values into matrix */
1861   nzcount = 0; jcount = 0;
1862   for (i=0; i<mbs; i++) {
1863     nzcountb = nzcount;
1864     nmask    = 0;
1865     for (j=0; j<bs; j++) {
1866       kmax = rowlengths[i*bs+j];
1867       for (k=0; k<kmax; k++) {
1868         tmp = jj[nzcount++]/bs; /* block col. index */
1869         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1870       }
1871     }
1872     /* sort the masked values */
1873     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1874 
1875     /* set "j" values into matrix */
1876     maskcount = 1;
1877     for (j=0; j<nmask; j++) {
1878       a->j[jcount++]  = masked[j];
1879       mask[masked[j]] = maskcount++;
1880     }
1881 
1882     /* set "a" values into matrix */
1883     ishift = bs2*a->i[i];
1884     for (j=0; j<bs; j++) {
1885       kmax = rowlengths[i*bs+j];
1886       for (k=0; k<kmax; k++) {
1887         tmp       = jj[nzcountb]/bs ; /* block col. index */
1888         if (tmp >= i){
1889           block     = mask[tmp] - 1;
1890           point     = jj[nzcountb] - bs*tmp;
1891           idx       = ishift + bs2*block + j + bs*point;
1892           a->a[idx] = aa[nzcountb];
1893         }
1894         nzcountb++;
1895       }
1896     }
1897     /* zero out the mask elements we set */
1898     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1899   }
1900   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1901 
1902   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1903   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1904   ierr = PetscFree(aa);CHKERRQ(ierr);
1905   ierr = PetscFree(jj);CHKERRQ(ierr);
1906   ierr = PetscFree(mask);CHKERRQ(ierr);
1907 
1908   B->assembled = PETSC_TRUE;
1909   ierr = MatView_Private(B);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 EXTERN_C_END
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1916 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1917 {
1918   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1919   MatScalar    *aa=a->a,*v,*v1;
1920   PetscScalar  *x,*b,*t,sum,d;
1921   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1922   int          nz,nz1,*vj,*vj1,i;
1923 
1924   PetscFunctionBegin;
1925   its = its*lits;
1926   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1927 
1928   if (bs > 1)
1929     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1930 
1931   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1932   if (xx != bb) {
1933     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
1934   } else {
1935     b = x;
1936   }
1937 
1938   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
1939 
1940   if (flag & SOR_ZERO_INITIAL_GUESS) {
1941     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1942       for (i=0; i<m; i++)
1943         t[i] = b[i];
1944 
1945       for (i=0; i<m; i++){
1946         d  = *(aa + ai[i]);  /* diag[i] */
1947         v  = aa + ai[i] + 1;
1948         vj = aj + ai[i] + 1;
1949         nz = ai[i+1] - ai[i] - 1;
1950         x[i] = omega*t[i]/d;
1951         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1952         PetscLogFlops(2*nz-1);
1953       }
1954     }
1955 
1956     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1957       for (i=0; i<m; i++)
1958         t[i] = b[i];
1959 
1960       for (i=0; i<m-1; i++){  /* update rhs */
1961         v  = aa + ai[i] + 1;
1962         vj = aj + ai[i] + 1;
1963         nz = ai[i+1] - ai[i] - 1;
1964         while (nz--) t[*vj++] -= x[i]*(*v++);
1965         PetscLogFlops(2*nz-1);
1966       }
1967       for (i=m-1; i>=0; i--){
1968         d  = *(aa + ai[i]);
1969         v  = aa + ai[i] + 1;
1970         vj = aj + ai[i] + 1;
1971         nz = ai[i+1] - ai[i] - 1;
1972         sum = t[i];
1973         while (nz--) sum -= x[*vj++]*(*v++);
1974         PetscLogFlops(2*nz-1);
1975         x[i] =   (1-omega)*x[i] + omega*sum/d;
1976       }
1977     }
1978     its--;
1979   }
1980 
1981   while (its--) {
1982     /*
1983        forward sweep:
1984        for i=0,...,m-1:
1985          sum[i] = (b[i] - U(i,:)x )/d[i];
1986          x[i]   = (1-omega)x[i] + omega*sum[i];
1987          b      = b - x[i]*U^T(i,:);
1988 
1989     */
1990     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1991       for (i=0; i<m; i++)
1992         t[i] = b[i];
1993 
1994       for (i=0; i<m; i++){
1995         d  = *(aa + ai[i]);  /* diag[i] */
1996         v  = aa + ai[i] + 1; v1=v;
1997         vj = aj + ai[i] + 1; vj1=vj;
1998         nz = ai[i+1] - ai[i] - 1; nz1=nz;
1999         sum = t[i];
2000         while (nz1--) sum -= (*v1++)*x[*vj1++];
2001         x[i] = (1-omega)*x[i] + omega*sum/d;
2002         while (nz--) t[*vj++] -= x[i]*(*v++);
2003         PetscLogFlops(4*nz-2);
2004       }
2005     }
2006 
2007   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2008       /*
2009        backward sweep:
2010        b = b - x[i]*U^T(i,:), i=0,...,n-2
2011        for i=m-1,...,0:
2012          sum[i] = (b[i] - U(i,:)x )/d[i];
2013          x[i]   = (1-omega)x[i] + omega*sum[i];
2014       */
2015       for (i=0; i<m; i++)
2016         t[i] = b[i];
2017 
2018       for (i=0; i<m-1; i++){  /* update rhs */
2019         v  = aa + ai[i] + 1;
2020         vj = aj + ai[i] + 1;
2021         nz = ai[i+1] - ai[i] - 1;
2022         while (nz--) t[*vj++] -= x[i]*(*v++);
2023         PetscLogFlops(2*nz-1);
2024       }
2025       for (i=m-1; i>=0; i--){
2026         d  = *(aa + ai[i]);
2027         v  = aa + ai[i] + 1;
2028         vj = aj + ai[i] + 1;
2029         nz = ai[i+1] - ai[i] - 1;
2030         sum = t[i];
2031         while (nz--) sum -= x[*vj++]*(*v++);
2032         PetscLogFlops(2*nz-1);
2033         x[i] =   (1-omega)*x[i] + omega*sum/d;
2034       }
2035     }
2036   }
2037 
2038   ierr = PetscFree(t); CHKERRQ(ierr);
2039   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2040   if (bb != xx) {
2041     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2042   }
2043 
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 
2048 
2049 
2050 
2051 
2052