xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision efcf0fc353b705cc10d937cc67ff6a314bf622fa)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_SYMMETRIC:
183   case MAT_STRUCTURALLY_SYMMETRIC:
184     break;
185   default:
186     SETERRQ(PETSC_ERR_SUP,"unknown option");
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
193 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
194 {
195   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
196   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
197   MatScalar    *aa,*aa_i;
198   PetscScalar  *v_i;
199 
200   PetscFunctionBegin;
201   bs  = a->bs;
202   ai  = a->i;
203   aj  = a->j;
204   aa  = a->a;
205   bs2 = a->bs2;
206 
207   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
208 
209   bn  = row/bs;   /* Block number */
210   bp  = row % bs; /* Block position */
211   M   = ai[bn+1] - ai[bn];
212   *ncols = bs*M;
213 
214   if (v) {
215     *v = 0;
216     if (*ncols) {
217       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
218       for (i=0; i<M; i++) { /* for each block in the block row */
219         v_i  = *v + i*bs;
220         aa_i = aa + bs2*(ai[bn] + i);
221         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
222       }
223     }
224   }
225 
226   if (cols) {
227     *cols = 0;
228     if (*ncols) {
229       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
230       for (i=0; i<M; i++) { /* for each block in the block row */
231         cols_i = *cols + i*bs;
232         itmp  = bs*aj[ai[bn] + i];
233         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
234       }
235     }
236   }
237 
238   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
239   /* this segment is currently removed, so only entries in the upper triangle are obtained */
240 #ifdef column_search
241   v_i    = *v    + M*bs;
242   cols_i = *cols + M*bs;
243   for (i=0; i<bn; i++){ /* for each block row */
244     M = ai[i+1] - ai[i];
245     for (j=0; j<M; j++){
246       itmp = aj[ai[i] + j];    /* block column value */
247       if (itmp == bn){
248         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
249         for (k=0; k<bs; k++) {
250           *cols_i++ = i*bs+k;
251           *v_i++    = aa_i[k];
252         }
253         *ncols += bs;
254         break;
255       }
256     }
257   }
258 #endif
259 
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
265 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
266 {
267   int ierr;
268 
269   PetscFunctionBegin;
270   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
271   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
277 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
278 {
279   int ierr;
280   PetscFunctionBegin;
281   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
287 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
288 {
289   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
290   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
291   PetscScalar  *aa;
292   FILE         *file;
293 
294   PetscFunctionBegin;
295   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
296   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
297   col_lens[0] = MAT_FILE_COOKIE;
298 
299   col_lens[1] = A->m;
300   col_lens[2] = A->m;
301   col_lens[3] = a->s_nz*bs2;
302 
303   /* store lengths of each row and write (including header) to file */
304   count = 0;
305   for (i=0; i<a->mbs; i++) {
306     for (j=0; j<bs; j++) {
307       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
308     }
309   }
310   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
311   ierr = PetscFree(col_lens);CHKERRQ(ierr);
312 
313   /* store column indices (zero start index) */
314   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
315   count = 0;
316   for (i=0; i<a->mbs; i++) {
317     for (j=0; j<bs; j++) {
318       for (k=a->i[i]; k<a->i[i+1]; k++) {
319         for (l=0; l<bs; l++) {
320           jj[count++] = bs*a->j[k] + l;
321         }
322       }
323     }
324   }
325   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
326   ierr = PetscFree(jj);CHKERRQ(ierr);
327 
328   /* store nonzero values */
329   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
330   count = 0;
331   for (i=0; i<a->mbs; i++) {
332     for (j=0; j<bs; j++) {
333       for (k=a->i[i]; k<a->i[i+1]; k++) {
334         for (l=0; l<bs; l++) {
335           aa[count++] = a->a[bs2*k + l*bs + j];
336         }
337       }
338     }
339   }
340   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
341   ierr = PetscFree(aa);CHKERRQ(ierr);
342 
343   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
344   if (file) {
345     fprintf(file,"-matload_block_size %d\n",a->bs);
346   }
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
352 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
353 {
354   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
355   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
356   char              *name;
357   PetscViewerFormat format;
358 
359   PetscFunctionBegin;
360   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
361   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
362   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
363     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
364   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
365     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
366   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
367     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
368     for (i=0; i<a->mbs; i++) {
369       for (j=0; j<bs; j++) {
370         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
371         for (k=a->i[i]; k<a->i[i+1]; k++) {
372           for (l=0; l<bs; l++) {
373 #if defined(PETSC_USE_COMPLEX)
374             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
375               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
376                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
377             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
378               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
379                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
380             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
381               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
382             }
383 #else
384             if (a->a[bs2*k + l*bs + j] != 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
386             }
387 #endif
388           }
389         }
390         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
391       }
392     }
393     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
394   } else {
395     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
396     for (i=0; i<a->mbs; i++) {
397       for (j=0; j<bs; j++) {
398         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
399         for (k=a->i[i]; k<a->i[i+1]; k++) {
400           for (l=0; l<bs; l++) {
401 #if defined(PETSC_USE_COMPLEX)
402             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
403               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
404                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
405             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
406               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
407                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
408             } else {
409               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
410             }
411 #else
412             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
413 #endif
414           }
415         }
416         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
417       }
418     }
419     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
420   }
421   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
427 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
428 {
429   Mat          A = (Mat) Aa;
430   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
431   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
432   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
433   MatScalar    *aa;
434   MPI_Comm     comm;
435   PetscViewer  viewer;
436 
437   PetscFunctionBegin;
438   /*
439       This is nasty. If this is called from an originally parallel matrix
440    then all processes call this,but only the first has the matrix so the
441    rest should return immediately.
442   */
443   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
444   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
445   if (rank) PetscFunctionReturn(0);
446 
447   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
448 
449   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
450   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
451 
452   /* loop over matrix elements drawing boxes */
453   color = PETSC_DRAW_BLUE;
454   for (i=0,row=0; i<mbs; i++,row+=bs) {
455     for (j=a->i[i]; j<a->i[i+1]; j++) {
456       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
457       x_l = a->j[j]*bs; x_r = x_l + 1.0;
458       aa = a->a + j*bs2;
459       for (k=0; k<bs; k++) {
460         for (l=0; l<bs; l++) {
461           if (PetscRealPart(*aa++) >=  0.) continue;
462           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
463         }
464       }
465     }
466   }
467   color = PETSC_DRAW_CYAN;
468   for (i=0,row=0; i<mbs; i++,row+=bs) {
469     for (j=a->i[i]; j<a->i[i+1]; j++) {
470       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
471       x_l = a->j[j]*bs; x_r = x_l + 1.0;
472       aa = a->a + j*bs2;
473       for (k=0; k<bs; k++) {
474         for (l=0; l<bs; l++) {
475           if (PetscRealPart(*aa++) != 0.) continue;
476           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
477         }
478       }
479     }
480   }
481 
482   color = PETSC_DRAW_RED;
483   for (i=0,row=0; i<mbs; i++,row+=bs) {
484     for (j=a->i[i]; j<a->i[i+1]; j++) {
485       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
486       x_l = a->j[j]*bs; x_r = x_l + 1.0;
487       aa = a->a + j*bs2;
488       for (k=0; k<bs; k++) {
489         for (l=0; l<bs; l++) {
490           if (PetscRealPart(*aa++) <= 0.) continue;
491           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
492         }
493       }
494     }
495   }
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
501 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
502 {
503   int          ierr;
504   PetscReal    xl,yl,xr,yr,w,h;
505   PetscDraw    draw;
506   PetscTruth   isnull;
507 
508   PetscFunctionBegin;
509 
510   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
511   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
512 
513   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
514   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
515   xr += w;    yr += h;  xl = -w;     yl = -h;
516   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
517   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
518   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatView_SeqSBAIJ"
524 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
525 {
526   int        ierr;
527   PetscTruth issocket,isascii,isbinary,isdraw;
528 
529   PetscFunctionBegin;
530   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
531   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
532   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
533   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
534   if (issocket) {
535     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
536   } else if (isascii){
537     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
538   } else if (isbinary) {
539     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
540   } else if (isdraw) {
541     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
542   } else {
543     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
544   }
545   PetscFunctionReturn(0);
546 }
547 
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
551 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
552 {
553   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
554   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
555   int        *ai = a->i,*ailen = a->ilen;
556   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
557   MatScalar  *ap,*aa = a->a,zero = 0.0;
558 
559   PetscFunctionBegin;
560   for (k=0; k<m; k++) { /* loop over rows */
561     row  = im[k]; brow = row/bs;
562     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
563     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
564     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
565     nrow = ailen[brow];
566     for (l=0; l<n; l++) { /* loop over columns */
567       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
568       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
569       col  = in[l] ;
570       bcol = col/bs;
571       cidx = col%bs;
572       ridx = row%bs;
573       high = nrow;
574       low  = 0; /* assume unsorted */
575       while (high-low > 5) {
576         t = (low+high)/2;
577         if (rp[t] > bcol) high = t;
578         else             low  = t;
579       }
580       for (i=low; i<high; i++) {
581         if (rp[i] > bcol) break;
582         if (rp[i] == bcol) {
583           *v++ = ap[bs2*i+bs*cidx+ridx];
584           goto finished;
585         }
586       }
587       *v++ = zero;
588       finished:;
589     }
590   }
591   PetscFunctionReturn(0);
592 }
593 
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
597 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
598 {
599   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
600   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
601   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
602   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
603   PetscTruth      roworiented=a->roworiented;
604   const MatScalar *value = v;
605   MatScalar       *ap,*aa = a->a,*bap;
606 
607   PetscFunctionBegin;
608   if (roworiented) {
609     stepval = (n-1)*bs;
610   } else {
611     stepval = (m-1)*bs;
612   }
613   for (k=0; k<m; k++) { /* loop over added rows */
614     row  = im[k];
615     if (row < 0) continue;
616 #if defined(PETSC_USE_BOPT_g)
617     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
618 #endif
619     rp   = aj + ai[row];
620     ap   = aa + bs2*ai[row];
621     rmax = imax[row];
622     nrow = ailen[row];
623     low  = 0;
624     for (l=0; l<n; l++) { /* loop over added columns */
625       if (in[l] < 0) continue;
626       col = in[l];
627 #if defined(PETSC_USE_BOPT_g)
628       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
629 #endif
630       if (col < row) continue; /* ignore lower triangular block */
631       if (roworiented) {
632         value = v + k*(stepval+bs)*bs + l*bs;
633       } else {
634         value = v + l*(stepval+bs)*bs + k*bs;
635       }
636       if (!sorted) low = 0; high = nrow;
637       while (high-low > 7) {
638         t = (low+high)/2;
639         if (rp[t] > col) high = t;
640         else             low  = t;
641       }
642       for (i=low; i<high; i++) {
643         if (rp[i] > col) break;
644         if (rp[i] == col) {
645           bap  = ap +  bs2*i;
646           if (roworiented) {
647             if (is == ADD_VALUES) {
648               for (ii=0; ii<bs; ii++,value+=stepval) {
649                 for (jj=ii; jj<bs2; jj+=bs) {
650                   bap[jj] += *value++;
651                 }
652               }
653             } else {
654               for (ii=0; ii<bs; ii++,value+=stepval) {
655                 for (jj=ii; jj<bs2; jj+=bs) {
656                   bap[jj] = *value++;
657                 }
658               }
659             }
660           } else {
661             if (is == ADD_VALUES) {
662               for (ii=0; ii<bs; ii++,value+=stepval) {
663                 for (jj=0; jj<bs; jj++) {
664                   *bap++ += *value++;
665                 }
666               }
667             } else {
668               for (ii=0; ii<bs; ii++,value+=stepval) {
669                 for (jj=0; jj<bs; jj++) {
670                   *bap++  = *value++;
671                 }
672               }
673             }
674           }
675           goto noinsert2;
676         }
677       }
678       if (nonew == 1) goto noinsert2;
679       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
680       if (nrow >= rmax) {
681         /* there is no extra room in row, therefore enlarge */
682         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
683         MatScalar *new_a;
684 
685         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
686 
687         /* malloc new storage space */
688         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
689 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
690         new_j   = (int*)(new_a + bs2*new_nz);
691         new_i   = new_j + new_nz;
692 
693         /* copy over old data into new slots */
694         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
695         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
696         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
697         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
698         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
699         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
700         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
701         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
702         /* free up old matrix storage */
703         ierr = PetscFree(a->a);CHKERRQ(ierr);
704         if (!a->singlemalloc) {
705           ierr = PetscFree(a->i);CHKERRQ(ierr);
706           ierr = PetscFree(a->j);CHKERRQ(ierr);
707         }
708         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
709         a->singlemalloc = PETSC_TRUE;
710 
711         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
712         rmax = imax[row] = imax[row] + CHUNKSIZE;
713         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
714         a->s_maxnz += bs2*CHUNKSIZE;
715         a->reallocs++;
716         a->s_nz++;
717       }
718       N = nrow++ - 1;
719       /* shift up all the later entries in this row */
720       for (ii=N; ii>=i; ii--) {
721         rp[ii+1] = rp[ii];
722         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
723       }
724       if (N >= i) {
725         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
726       }
727       rp[i] = col;
728       bap   = ap +  bs2*i;
729       if (roworiented) {
730         for (ii=0; ii<bs; ii++,value+=stepval) {
731           for (jj=ii; jj<bs2; jj+=bs) {
732             bap[jj] = *value++;
733           }
734         }
735       } else {
736         for (ii=0; ii<bs; ii++,value+=stepval) {
737           for (jj=0; jj<bs; jj++) {
738             *bap++  = *value++;
739           }
740         }
741       }
742       noinsert2:;
743       low = i;
744     }
745     ailen[row] = nrow;
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
753 {
754   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
755   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
756   int        m = A->m,*ip,N,*ailen = a->ilen;
757   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
758   MatScalar  *aa = a->a,*ap;
759 
760   PetscFunctionBegin;
761   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
762 
763   if (m) rmax = ailen[0];
764   for (i=1; i<mbs; i++) {
765     /* move each row back by the amount of empty slots (fshift) before it*/
766     fshift += imax[i-1] - ailen[i-1];
767     rmax   = PetscMax(rmax,ailen[i]);
768     if (fshift) {
769       ip = aj + ai[i]; ap = aa + bs2*ai[i];
770       N = ailen[i];
771       for (j=0; j<N; j++) {
772         ip[j-fshift] = ip[j];
773         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
774       }
775     }
776     ai[i] = ai[i-1] + ailen[i-1];
777   }
778   if (mbs) {
779     fshift += imax[mbs-1] - ailen[mbs-1];
780     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
781   }
782   /* reset ilen and imax for each row */
783   for (i=0; i<mbs; i++) {
784     ailen[i] = imax[i] = ai[i+1] - ai[i];
785   }
786   a->s_nz = ai[mbs];
787 
788   /* diagonals may have moved, reset it */
789   if (a->diag) {
790     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
791   }
792   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
793            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
794   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
795            a->reallocs);
796   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
797   a->reallocs          = 0;
798   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
799 
800   PetscFunctionReturn(0);
801 }
802 
803 /*
804    This function returns an array of flags which indicate the locations of contiguous
805    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
806    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
807    Assume: sizes should be long enough to hold all the values.
808 */
809 #undef __FUNCT__
810 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
811 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
812 {
813   int        i,j,k,row;
814   PetscTruth flg;
815 
816   PetscFunctionBegin;
817   for (i=0,j=0; i<n; j++) {
818     row = idx[i];
819     if (row%bs!=0) { /* Not the begining of a block */
820       sizes[j] = 1;
821       i++;
822     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
823       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
824       i++;
825     } else { /* Begining of the block, so check if the complete block exists */
826       flg = PETSC_TRUE;
827       for (k=1; k<bs; k++) {
828         if (row+k != idx[i+k]) { /* break in the block */
829           flg = PETSC_FALSE;
830           break;
831         }
832       }
833       if (flg == PETSC_TRUE) { /* No break in the bs */
834         sizes[j] = bs;
835         i+= bs;
836       } else {
837         sizes[j] = 1;
838         i++;
839       }
840     }
841   }
842   *bs_max = j;
843   PetscFunctionReturn(0);
844 }
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
848 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
849 {
850   PetscFunctionBegin;
851   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
852 }
853 
854 /* Only add/insert a(i,j) with i<=j (blocks).
855    Any a(i,j) with i>j input by user is ingored.
856 */
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
860 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
861 {
862   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
863   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
864   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
865   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
866   int         ridx,cidx,bs2=a->bs2,ierr;
867   MatScalar   *ap,value,*aa=a->a,*bap;
868 
869   PetscFunctionBegin;
870 
871   for (k=0; k<m; k++) { /* loop over added rows */
872     row  = im[k];       /* row number */
873     brow = row/bs;      /* block row number */
874     if (row < 0) continue;
875 #if defined(PETSC_USE_BOPT_g)
876     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
877 #endif
878     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
879     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
880     rmax = imax[brow];  /* maximum space allocated for this row */
881     nrow = ailen[brow]; /* actual length of this row */
882     low  = 0;
883 
884     for (l=0; l<n; l++) { /* loop over added columns */
885       if (in[l] < 0) continue;
886 #if defined(PETSC_USE_BOPT_g)
887       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
888 #endif
889       col = in[l];
890       bcol = col/bs;              /* block col number */
891 
892       if (brow <= bcol){
893         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
894         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
895           /* element value a(k,l) */
896           if (roworiented) {
897             value = v[l + k*n];
898           } else {
899             value = v[k + l*m];
900           }
901 
902           /* move pointer bap to a(k,l) quickly and add/insert value */
903           if (!sorted) low = 0; high = nrow;
904           while (high-low > 7) {
905             t = (low+high)/2;
906             if (rp[t] > bcol) high = t;
907             else              low  = t;
908           }
909           for (i=low; i<high; i++) {
910             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
911             if (rp[i] > bcol) break;
912             if (rp[i] == bcol) {
913               bap  = ap +  bs2*i + bs*cidx + ridx;
914               if (is == ADD_VALUES) *bap += value;
915               else                  *bap  = value;
916               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
917               if (brow == bcol && ridx < cidx){
918                 bap  = ap +  bs2*i + bs*ridx + cidx;
919                 if (is == ADD_VALUES) *bap += value;
920                 else                  *bap  = value;
921               }
922               goto noinsert1;
923             }
924           }
925 
926       if (nonew == 1) goto noinsert1;
927       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
928       if (nrow >= rmax) {
929         /* there is no extra room in row, therefore enlarge */
930         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
931         MatScalar *new_a;
932 
933         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
934 
935         /* Malloc new storage space */
936         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
937         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
938         new_j = (int*)(new_a + bs2*new_nz);
939         new_i = new_j + new_nz;
940 
941         /* copy over old data into new slots */
942         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
943         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
944         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
945         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
946         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
947         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
948         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
949         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
950         /* free up old matrix storage */
951         ierr = PetscFree(a->a);CHKERRQ(ierr);
952         if (!a->singlemalloc) {
953           ierr = PetscFree(a->i);CHKERRQ(ierr);
954           ierr = PetscFree(a->j);CHKERRQ(ierr);
955         }
956         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
957         a->singlemalloc = PETSC_TRUE;
958 
959         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
960         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
961         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
962         a->s_maxnz += bs2*CHUNKSIZE;
963         a->reallocs++;
964         a->s_nz++;
965       }
966 
967       N = nrow++ - 1;
968       /* shift up all the later entries in this row */
969       for (ii=N; ii>=i; ii--) {
970         rp[ii+1] = rp[ii];
971         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
972       }
973       if (N>=i) {
974         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
975       }
976       rp[i]                      = bcol;
977       ap[bs2*i + bs*cidx + ridx] = value;
978       noinsert1:;
979       low = i;
980       /* } */
981         }
982       } /* end of if .. if..  */
983     }                     /* end of loop over added columns */
984     ailen[brow] = nrow;
985   }                       /* end of loop over added rows */
986 
987   PetscFunctionReturn(0);
988 }
989 
990 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
991 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
992 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
993 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
994 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
995 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
996 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
997 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
998 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
999 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1000 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1001 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1002 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1003 extern int MatZeroEntries_SeqSBAIJ(Mat);
1004 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1005 
1006 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1007 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1008 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1009 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1010 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1011 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1012 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1013 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1014 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1015 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1016 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1017 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1018 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1019 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1020 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1021 
1022 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1023 
1024 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1025 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1026 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1027 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1028 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1029 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1030 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1031 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1032 
1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1041 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1042 
1043 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1044 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1045 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1046 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1047 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1048 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1049 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1050 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1051 
1052 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1053 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1054 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1055 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1056 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1057 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1058 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1059 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1060 
1061 #ifdef HAVE_MatICCFactor
1062 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1065 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1066 {
1067   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1068   Mat         outA;
1069   int         ierr;
1070   PetscTruth  row_identity,col_identity;
1071 
1072   PetscFunctionBegin;
1073   /*
1074   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1075   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1076   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1077   if (!row_identity || !col_identity) {
1078     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1079   }
1080   */
1081 
1082   outA          = inA;
1083   inA->factor   = FACTOR_CHOLESKY;
1084 
1085   if (!a->diag) {
1086     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1087   }
1088   /*
1089       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1090       for ILU(0) factorization with natural ordering
1091   */
1092   switch (a->bs) {
1093   case 1:
1094     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1095     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1096     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1097     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1098   case 2:
1099     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1100     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1101     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1102     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1103     break;
1104   case 3:
1105     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1106     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1107     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1108     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1109     break;
1110   case 4:
1111     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1112     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1113     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1114     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1115     break;
1116   case 5:
1117     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1118     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1119     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1120     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1121     break;
1122   case 6:
1123     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1124     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1125     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1126     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1127     break;
1128   case 7:
1129     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1130     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1131     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1132     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1133     break;
1134   default:
1135     a->row        = row;
1136     a->icol       = col;
1137     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1138     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1139 
1140     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1141     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1142     PetscLogObjectParent(inA,a->icol);
1143 
1144     if (!a->solve_work) {
1145       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1146       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1147     }
1148   }
1149 
1150   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1151 
1152   PetscFunctionReturn(0);
1153 }
1154 #endif
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1158 int MatPrintHelp_SeqSBAIJ(Mat A)
1159 {
1160   static PetscTruth called = PETSC_FALSE;
1161   MPI_Comm          comm = A->comm;
1162   int               ierr;
1163 
1164   PetscFunctionBegin;
1165   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1166   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1167   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1174 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1175 {
1176   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1177   int         i,nz,n;
1178 
1179   PetscFunctionBegin;
1180   nz = baij->s_maxnz;
1181   n  = mat->n;
1182   for (i=0; i<nz; i++) {
1183     baij->j[i] = indices[i];
1184   }
1185   baij->s_nz = nz;
1186   for (i=0; i<n; i++) {
1187     baij->ilen[i] = baij->imax[i];
1188   }
1189 
1190   PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1196 /*@
1197     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1198        in the matrix.
1199 
1200   Input Parameters:
1201 +  mat     - the SeqSBAIJ matrix
1202 -  indices - the column indices
1203 
1204   Level: advanced
1205 
1206   Notes:
1207     This can be called if you have precomputed the nonzero structure of the
1208   matrix and want to provide it to the matrix object to improve the performance
1209   of the MatSetValues() operation.
1210 
1211     You MUST have set the correct numbers of nonzeros per row in the call to
1212   MatCreateSeqSBAIJ().
1213 
1214     MUST be called before any calls to MatSetValues()
1215 
1216 .seealso: MatCreateSeqSBAIJ
1217 @*/
1218 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1219 {
1220   int ierr,(*f)(Mat,int *);
1221 
1222   PetscFunctionBegin;
1223   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1224   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1225   if (f) {
1226     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1227   } else {
1228     SETERRQ(1,"Wrong type of matrix to set column indices");
1229   }
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1235 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1236 {
1237   int        ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1246 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1247 {
1248   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1249   PetscFunctionBegin;
1250   *array = a->a;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1256 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1257 {
1258   PetscFunctionBegin;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #include "petscblaslapack.h"
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1265 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1266 {
1267   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1268   int          ierr,one=1,i,bs=y->bs,bs2,j;
1269 
1270   PetscFunctionBegin;
1271   if (str == SAME_NONZERO_PATTERN) {
1272     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1273   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1274     if (y->xtoy && y->XtoY != X) {
1275       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1276       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1277     }
1278     if (!y->xtoy) { /* get xtoy */
1279       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1280       y->XtoY = X;
1281     }
1282     bs2 = bs*bs;
1283     for (i=0; i<x->s_nz; i++) {
1284       j = 0;
1285       while (j < bs2){
1286         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1287         j++;
1288       }
1289     }
1290     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1291   } else {
1292     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1299 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1300 {
1301   PetscFunctionBegin;
1302   *flg = PETSC_TRUE;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1308 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1309 {
1310   PetscFunctionBegin;
1311   *flg = PETSC_TRUE;
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1317 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1318 {
1319   PetscFunctionBegin;
1320   *flg = PETSC_FALSE;
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /* -------------------------------------------------------------------*/
1325 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1326        MatGetRow_SeqSBAIJ,
1327        MatRestoreRow_SeqSBAIJ,
1328        MatMult_SeqSBAIJ_N,
1329 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1330        MatMultTranspose_SeqSBAIJ,
1331        MatMultTransposeAdd_SeqSBAIJ,
1332        MatSolve_SeqSBAIJ_N,
1333        0,
1334        0,
1335 /*10*/ 0,
1336        0,
1337        MatCholeskyFactor_SeqSBAIJ,
1338        MatRelax_SeqSBAIJ,
1339        MatTranspose_SeqSBAIJ,
1340 /*15*/ MatGetInfo_SeqSBAIJ,
1341        MatEqual_SeqSBAIJ,
1342        MatGetDiagonal_SeqSBAIJ,
1343        MatDiagonalScale_SeqSBAIJ,
1344        MatNorm_SeqSBAIJ,
1345 /*20*/ 0,
1346        MatAssemblyEnd_SeqSBAIJ,
1347        0,
1348        MatSetOption_SeqSBAIJ,
1349        MatZeroEntries_SeqSBAIJ,
1350 /*25*/ MatZeroRows_SeqSBAIJ,
1351        0,
1352        0,
1353        MatCholeskyFactorSymbolic_SeqSBAIJ,
1354        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1355 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1356        0,
1357        MatICCFactorSymbolic_SeqSBAIJ,
1358        MatGetArray_SeqSBAIJ,
1359        MatRestoreArray_SeqSBAIJ,
1360 /*35*/ MatDuplicate_SeqSBAIJ,
1361        0,
1362        0,
1363        0,
1364        0,
1365 /*40*/ MatAXPY_SeqSBAIJ,
1366        MatGetSubMatrices_SeqSBAIJ,
1367        MatIncreaseOverlap_SeqSBAIJ,
1368        MatGetValues_SeqSBAIJ,
1369        0,
1370 /*45*/ MatPrintHelp_SeqSBAIJ,
1371        MatScale_SeqSBAIJ,
1372        0,
1373        0,
1374        0,
1375 /*50*/ MatGetBlockSize_SeqSBAIJ,
1376        MatGetRowIJ_SeqSBAIJ,
1377        MatRestoreRowIJ_SeqSBAIJ,
1378        0,
1379        0,
1380 /*55*/ 0,
1381        0,
1382        0,
1383        0,
1384        MatSetValuesBlocked_SeqSBAIJ,
1385 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1386        0,
1387        0,
1388        MatGetPetscMaps_Petsc,
1389        0,
1390 /*65*/ 0,
1391        0,
1392        0,
1393        0,
1394        0,
1395 /*70*/ MatGetRowMax_SeqSBAIJ,
1396        0,
1397        0,
1398        0,
1399        0,
1400 /*75*/ 0,
1401        0,
1402        0,
1403        0,
1404        0,
1405 /*80*/ 0,
1406        0,
1407        0,
1408        0,
1409 #if !defined(PETSC_USE_COMPLEX)
1410        MatGetInertia_SeqSBAIJ,
1411 #else
1412        0,
1413 #endif
1414 /*85*/ MatLoad_SeqSBAIJ,
1415        MatIsSymmetric_SeqSBAIJ,
1416        MatIsStructurallySymmetric_SeqSBAIJ,
1417        MatIsHermitian_SeqSBAIJ
1418 };
1419 
1420 EXTERN_C_BEGIN
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1423 int MatStoreValues_SeqSBAIJ(Mat mat)
1424 {
1425   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1426   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1427   int         ierr;
1428 
1429   PetscFunctionBegin;
1430   if (aij->nonew != 1) {
1431     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1432   }
1433 
1434   /* allocate space for values if not already there */
1435   if (!aij->saved_values) {
1436     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1437   }
1438 
1439   /* copy values over */
1440   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 EXTERN_C_END
1444 
1445 EXTERN_C_BEGIN
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1448 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1449 {
1450   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1451   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1452 
1453   PetscFunctionBegin;
1454   if (aij->nonew != 1) {
1455     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1456   }
1457   if (!aij->saved_values) {
1458     SETERRQ(1,"Must call MatStoreValues(A);first");
1459   }
1460 
1461   /* copy values over */
1462   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 EXTERN_C_END
1466 
1467 EXTERN_C_BEGIN
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1470 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1471 {
1472   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1473   int          i,len,ierr,mbs,bs2;
1474   PetscTruth   flg;
1475   int          s_nz;
1476 
1477   PetscFunctionBegin;
1478   B->preallocated = PETSC_TRUE;
1479   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1480   mbs  = B->m/bs;
1481   bs2  = bs*bs;
1482 
1483   if (mbs*bs != B->m) {
1484     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1485   }
1486 
1487   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1488   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1489   if (nnz) {
1490     for (i=0; i<mbs; i++) {
1491       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1492       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1493     }
1494   }
1495 
1496   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1497   if (!flg) {
1498     switch (bs) {
1499     case 1:
1500       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1501       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1502       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1503       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1504       B->ops->mult            = MatMult_SeqSBAIJ_1;
1505       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1506       break;
1507     case 2:
1508       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1509       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1510       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1511       B->ops->mult            = MatMult_SeqSBAIJ_2;
1512       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1513       break;
1514     case 3:
1515       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1516       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1517       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1518       B->ops->mult            = MatMult_SeqSBAIJ_3;
1519       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1520       break;
1521     case 4:
1522       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1523       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1524       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1525       B->ops->mult            = MatMult_SeqSBAIJ_4;
1526       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1527       break;
1528     case 5:
1529       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1530       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1531       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1532       B->ops->mult            = MatMult_SeqSBAIJ_5;
1533       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1534       break;
1535     case 6:
1536       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1537       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1538       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1539       B->ops->mult            = MatMult_SeqSBAIJ_6;
1540       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1541       break;
1542     case 7:
1543       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1544       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1545       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1546       B->ops->mult            = MatMult_SeqSBAIJ_7;
1547       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1548       break;
1549     }
1550   }
1551 
1552   b->mbs = mbs;
1553   b->nbs = mbs;
1554   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1555   if (!nnz) {
1556     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1557     else if (nz <= 0)        nz = 1;
1558     for (i=0; i<mbs; i++) {
1559       b->imax[i] = nz;
1560     }
1561     nz = nz*mbs; /* total nz */
1562   } else {
1563     nz = 0;
1564     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1565   }
1566   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1567   s_nz = nz;
1568 
1569   /* allocate the matrix space */
1570   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1571   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1572   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1573   b->j = (int*)(b->a + s_nz*bs2);
1574   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1575   b->i = b->j + s_nz;
1576   b->singlemalloc = PETSC_TRUE;
1577 
1578   /* pointer to beginning of each row */
1579   b->i[0] = 0;
1580   for (i=1; i<mbs+1; i++) {
1581     b->i[i] = b->i[i-1] + b->imax[i-1];
1582   }
1583 
1584   /* b->ilen will count nonzeros in each block row so far. */
1585   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1586   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1587   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1588 
1589   b->bs               = bs;
1590   b->bs2              = bs2;
1591   b->s_nz             = 0;
1592   b->s_maxnz          = s_nz*bs2;
1593 
1594   b->inew             = 0;
1595   b->jnew             = 0;
1596   b->anew             = 0;
1597   b->a2anew           = 0;
1598   b->permute          = PETSC_FALSE;
1599   PetscFunctionReturn(0);
1600 }
1601 EXTERN_C_END
1602 
1603 /*MC
1604    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1605    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1606 
1607    Options Database Keys:
1608 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1609 
1610   Level: beginner
1611 
1612 .seealso: MatCreateSeqSBAIJ
1613 M*/
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1618 int MatCreate_SeqSBAIJ(Mat B)
1619 {
1620   Mat_SeqSBAIJ *b;
1621   int          ierr,size;
1622 
1623   PetscFunctionBegin;
1624   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1625   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1626   B->m = B->M = PetscMax(B->m,B->M);
1627   B->n = B->N = PetscMax(B->n,B->N);
1628 
1629   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1630   B->data = (void*)b;
1631   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1632   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1633   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1634   B->ops->view        = MatView_SeqSBAIJ;
1635   B->factor           = 0;
1636   B->lupivotthreshold = 1.0;
1637   B->mapping          = 0;
1638   b->row              = 0;
1639   b->icol             = 0;
1640   b->reallocs         = 0;
1641   b->saved_values     = 0;
1642 
1643   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1644   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1645 
1646   b->sorted           = PETSC_FALSE;
1647   b->roworiented      = PETSC_TRUE;
1648   b->nonew            = 0;
1649   b->diag             = 0;
1650   b->solve_work       = 0;
1651   b->mult_work        = 0;
1652   B->spptr            = 0;
1653   b->keepzeroedrows   = PETSC_FALSE;
1654   b->xtoy             = 0;
1655   b->XtoY             = 0;
1656 
1657   b->inew             = 0;
1658   b->jnew             = 0;
1659   b->anew             = 0;
1660   b->a2anew           = 0;
1661   b->permute          = PETSC_FALSE;
1662 
1663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1664                                      "MatStoreValues_SeqSBAIJ",
1665                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1667                                      "MatRetrieveValues_SeqSBAIJ",
1668                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1670                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1671                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1673                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1674                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1681 /*@C
1682    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1683    compressed row) format.  For good matrix assembly performance the
1684    user should preallocate the matrix storage by setting the parameter nz
1685    (or the array nnz).  By setting these parameters accurately, performance
1686    during matrix assembly can be increased by more than a factor of 50.
1687 
1688    Collective on Mat
1689 
1690    Input Parameters:
1691 +  A - the symmetric matrix
1692 .  bs - size of block
1693 .  nz - number of block nonzeros per block row (same for all rows)
1694 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1695          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1696 
1697    Options Database Keys:
1698 .   -mat_no_unroll - uses code that does not unroll the loops in the
1699                      block calculations (much slower)
1700 .    -mat_block_size - size of the blocks to use
1701 
1702    Level: intermediate
1703 
1704    Notes:
1705    Specify the preallocated storage with either nz or nnz (not both).
1706    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1707    allocation.  For additional details, see the users manual chapter on
1708    matrices.
1709 
1710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1711 @*/
1712 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1713   int ierr,(*f)(Mat,int,int,const int[]);
1714 
1715   PetscFunctionBegin;
1716   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1717   if (f) {
1718     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatCreateSeqSBAIJ"
1725 /*@C
1726    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1727    compressed row) format.  For good matrix assembly performance the
1728    user should preallocate the matrix storage by setting the parameter nz
1729    (or the array nnz).  By setting these parameters accurately, performance
1730    during matrix assembly can be increased by more than a factor of 50.
1731 
1732    Collective on MPI_Comm
1733 
1734    Input Parameters:
1735 +  comm - MPI communicator, set to PETSC_COMM_SELF
1736 .  bs - size of block
1737 .  m - number of rows, or number of columns
1738 .  nz - number of block nonzeros per block row (same for all rows)
1739 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1740          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1741 
1742    Output Parameter:
1743 .  A - the symmetric matrix
1744 
1745    Options Database Keys:
1746 .   -mat_no_unroll - uses code that does not unroll the loops in the
1747                      block calculations (much slower)
1748 .    -mat_block_size - size of the blocks to use
1749 
1750    Level: intermediate
1751 
1752    Notes:
1753 
1754    Specify the preallocated storage with either nz or nnz (not both).
1755    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1756    allocation.  For additional details, see the users manual chapter on
1757    matrices.
1758 
1759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1760 @*/
1761 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1762 {
1763   int ierr;
1764 
1765   PetscFunctionBegin;
1766   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1767   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1768   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1774 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1775 {
1776   Mat         C;
1777   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1778   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1779 
1780   PetscFunctionBegin;
1781   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1782 
1783   *B = 0;
1784   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1785   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1786   c    = (Mat_SeqSBAIJ*)C->data;
1787 
1788   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1789   C->preallocated   = PETSC_TRUE;
1790   C->factor         = A->factor;
1791   c->row            = 0;
1792   c->icol           = 0;
1793   c->saved_values   = 0;
1794   c->keepzeroedrows = a->keepzeroedrows;
1795   C->assembled      = PETSC_TRUE;
1796 
1797   c->bs         = a->bs;
1798   c->bs2        = a->bs2;
1799   c->mbs        = a->mbs;
1800   c->nbs        = a->nbs;
1801 
1802   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1803   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1804   for (i=0; i<mbs; i++) {
1805     c->imax[i] = a->imax[i];
1806     c->ilen[i] = a->ilen[i];
1807   }
1808 
1809   /* allocate the matrix space */
1810   c->singlemalloc = PETSC_TRUE;
1811   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1812   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1813   c->j = (int*)(c->a + nz*bs2);
1814   c->i = c->j + nz;
1815   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1816   if (mbs > 0) {
1817     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1818     if (cpvalues == MAT_COPY_VALUES) {
1819       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1820     } else {
1821       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1822     }
1823   }
1824 
1825   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1826   c->sorted      = a->sorted;
1827   c->roworiented = a->roworiented;
1828   c->nonew       = a->nonew;
1829 
1830   if (a->diag) {
1831     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1832     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1833     for (i=0; i<mbs; i++) {
1834       c->diag[i] = a->diag[i];
1835     }
1836   } else c->diag        = 0;
1837   c->s_nz               = a->s_nz;
1838   c->s_maxnz            = a->s_maxnz;
1839   c->solve_work         = 0;
1840   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1841   c->mult_work          = 0;
1842   *B = C;
1843   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1849 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1850 {
1851   Mat_SeqSBAIJ *a;
1852   Mat          B;
1853   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1854   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1855   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1856   int          *masked,nmask,tmp,bs2,ishift;
1857   PetscScalar  *aa;
1858   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1859 
1860   PetscFunctionBegin;
1861   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1862   bs2  = bs*bs;
1863 
1864   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1865   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1866   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1867   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1868   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1869   M = header[1]; N = header[2]; nz = header[3];
1870 
1871   if (header[3] < 0) {
1872     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1873   }
1874 
1875   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1876 
1877   /*
1878      This code adds extra rows to make sure the number of rows is
1879     divisible by the blocksize
1880   */
1881   mbs        = M/bs;
1882   extra_rows = bs - M + bs*(mbs);
1883   if (extra_rows == bs) extra_rows = 0;
1884   else                  mbs++;
1885   if (extra_rows) {
1886     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1887   }
1888 
1889   /* read in row lengths */
1890   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1891   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1892   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1893 
1894   /* read in column indices */
1895   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1896   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1897   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1898 
1899   /* loop over row lengths determining block row lengths */
1900   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1901   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1902   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1903   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1904   masked   = mask + mbs;
1905   rowcount = 0; nzcount = 0;
1906   for (i=0; i<mbs; i++) {
1907     nmask = 0;
1908     for (j=0; j<bs; j++) {
1909       kmax = rowlengths[rowcount];
1910       for (k=0; k<kmax; k++) {
1911         tmp = jj[nzcount++]/bs;   /* block col. index */
1912         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1913       }
1914       rowcount++;
1915     }
1916     s_browlengths[i] += nmask;
1917 
1918     /* zero out the mask elements we set */
1919     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1920   }
1921 
1922   /* create our matrix */
1923   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1924   ierr = MatSetType(B,type);CHKERRQ(ierr);
1925   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1926   a = (Mat_SeqSBAIJ*)B->data;
1927 
1928   /* set matrix "i" values */
1929   a->i[0] = 0;
1930   for (i=1; i<= mbs; i++) {
1931     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1932     a->ilen[i-1] = s_browlengths[i-1];
1933   }
1934   a->s_nz = a->i[mbs];
1935 
1936   /* read in nonzero values */
1937   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1938   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1939   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1940 
1941   /* set "a" and "j" values into matrix */
1942   nzcount = 0; jcount = 0;
1943   for (i=0; i<mbs; i++) {
1944     nzcountb = nzcount;
1945     nmask    = 0;
1946     for (j=0; j<bs; j++) {
1947       kmax = rowlengths[i*bs+j];
1948       for (k=0; k<kmax; k++) {
1949         tmp = jj[nzcount++]/bs; /* block col. index */
1950         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1951       }
1952     }
1953     /* sort the masked values */
1954     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1955 
1956     /* set "j" values into matrix */
1957     maskcount = 1;
1958     for (j=0; j<nmask; j++) {
1959       a->j[jcount++]  = masked[j];
1960       mask[masked[j]] = maskcount++;
1961     }
1962 
1963     /* set "a" values into matrix */
1964     ishift = bs2*a->i[i];
1965     for (j=0; j<bs; j++) {
1966       kmax = rowlengths[i*bs+j];
1967       for (k=0; k<kmax; k++) {
1968         tmp       = jj[nzcountb]/bs ; /* block col. index */
1969         if (tmp >= i){
1970           block     = mask[tmp] - 1;
1971           point     = jj[nzcountb] - bs*tmp;
1972           idx       = ishift + bs2*block + j + bs*point;
1973           a->a[idx] = aa[nzcountb];
1974         }
1975         nzcountb++;
1976       }
1977     }
1978     /* zero out the mask elements we set */
1979     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1980   }
1981   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1982 
1983   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1984   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1985   ierr = PetscFree(aa);CHKERRQ(ierr);
1986   ierr = PetscFree(jj);CHKERRQ(ierr);
1987   ierr = PetscFree(mask);CHKERRQ(ierr);
1988 
1989   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1990   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1991   ierr = MatView_Private(B);CHKERRQ(ierr);
1992   *A = B;
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatRelax_SeqSBAIJ"
1998 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1999 {
2000   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2001   MatScalar    *aa=a->a,*v,*v1;
2002   PetscScalar  *x,*b,*t,sum,d;
2003   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
2004   int          nz,nz1,*vj,*vj1,i;
2005 
2006   PetscFunctionBegin;
2007   its = its*lits;
2008   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2009 
2010   if (bs > 1)
2011     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2012 
2013   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2014   if (xx != bb) {
2015     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2016   } else {
2017     b = x;
2018   }
2019 
2020   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2021 
2022   if (flag & SOR_ZERO_INITIAL_GUESS) {
2023     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2024       for (i=0; i<m; i++)
2025         t[i] = b[i];
2026 
2027       for (i=0; i<m; i++){
2028         d  = *(aa + ai[i]);  /* diag[i] */
2029         v  = aa + ai[i] + 1;
2030         vj = aj + ai[i] + 1;
2031         nz = ai[i+1] - ai[i] - 1;
2032         x[i] = omega*t[i]/d;
2033         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2034         PetscLogFlops(2*nz-1);
2035       }
2036     }
2037 
2038     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2039       for (i=0; i<m; i++)
2040         t[i] = b[i];
2041 
2042       for (i=0; i<m-1; i++){  /* update rhs */
2043         v  = aa + ai[i] + 1;
2044         vj = aj + ai[i] + 1;
2045         nz = ai[i+1] - ai[i] - 1;
2046         while (nz--) t[*vj++] -= x[i]*(*v++);
2047         PetscLogFlops(2*nz-1);
2048       }
2049       for (i=m-1; i>=0; i--){
2050         d  = *(aa + ai[i]);
2051         v  = aa + ai[i] + 1;
2052         vj = aj + ai[i] + 1;
2053         nz = ai[i+1] - ai[i] - 1;
2054         sum = t[i];
2055         while (nz--) sum -= x[*vj++]*(*v++);
2056         PetscLogFlops(2*nz-1);
2057         x[i] =   (1-omega)*x[i] + omega*sum/d;
2058       }
2059     }
2060     its--;
2061   }
2062 
2063   while (its--) {
2064     /*
2065        forward sweep:
2066        for i=0,...,m-1:
2067          sum[i] = (b[i] - U(i,:)x )/d[i];
2068          x[i]   = (1-omega)x[i] + omega*sum[i];
2069          b      = b - x[i]*U^T(i,:);
2070 
2071     */
2072     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2073       for (i=0; i<m; i++)
2074         t[i] = b[i];
2075 
2076       for (i=0; i<m; i++){
2077         d  = *(aa + ai[i]);  /* diag[i] */
2078         v  = aa + ai[i] + 1; v1=v;
2079         vj = aj + ai[i] + 1; vj1=vj;
2080         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2081         sum = t[i];
2082         while (nz1--) sum -= (*v1++)*x[*vj1++];
2083         x[i] = (1-omega)*x[i] + omega*sum/d;
2084         while (nz--) t[*vj++] -= x[i]*(*v++);
2085         PetscLogFlops(4*nz-2);
2086       }
2087     }
2088 
2089   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2090       /*
2091        backward sweep:
2092        b = b - x[i]*U^T(i,:), i=0,...,n-2
2093        for i=m-1,...,0:
2094          sum[i] = (b[i] - U(i,:)x )/d[i];
2095          x[i]   = (1-omega)x[i] + omega*sum[i];
2096       */
2097       for (i=0; i<m; i++)
2098         t[i] = b[i];
2099 
2100       for (i=0; i<m-1; i++){  /* update rhs */
2101         v  = aa + ai[i] + 1;
2102         vj = aj + ai[i] + 1;
2103         nz = ai[i+1] - ai[i] - 1;
2104         while (nz--) t[*vj++] -= x[i]*(*v++);
2105         PetscLogFlops(2*nz-1);
2106       }
2107       for (i=m-1; i>=0; i--){
2108         d  = *(aa + ai[i]);
2109         v  = aa + ai[i] + 1;
2110         vj = aj + ai[i] + 1;
2111         nz = ai[i+1] - ai[i] - 1;
2112         sum = t[i];
2113         while (nz--) sum -= x[*vj++]*(*v++);
2114         PetscLogFlops(2*nz-1);
2115         x[i] =   (1-omega)*x[i] + omega*sum/d;
2116       }
2117     }
2118   }
2119 
2120   ierr = PetscFree(t); CHKERRQ(ierr);
2121   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2122   if (bb != xx) {
2123     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2124   }
2125 
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 
2130 
2131 
2132 
2133 
2134