xref: /petsc/src/mat/impls/sbaij/seq/sbaij.c (revision 2fc52814d27bf1f4e71021c1c3ebb532b583ed60)
1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
2 
3 /*
4     Defines the basic matrix operations for the SBAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"         /*I "petscmat.h" I*/
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "src/mat/impls/sbaij/seq/sbaij.h"
11 
12 #define CHUNKSIZE  10
13 
14 /*
15      Checks for missing diagonals
16 */
17 #undef __FUNCT__
18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ"
19 int MatMissingDiagonal_SeqSBAIJ(Mat A)
20 {
21   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
22   int         *diag,*jj = a->j,i,ierr;
23 
24   PetscFunctionBegin;
25   ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr);
26   diag = a->diag;
27   for (i=0; i<a->mbs; i++) {
28     if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
29   }
30   PetscFunctionReturn(0);
31 }
32 
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ"
35 int MatMarkDiagonal_SeqSBAIJ(Mat A)
36 {
37   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38   int          i,mbs = a->mbs,ierr;
39 
40   PetscFunctionBegin;
41   if (a->diag) PetscFunctionReturn(0);
42 
43   ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr);
44   PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
45   for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ"
51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
52 {
53   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54   int         n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59 
60   if (oshift == 1) {
61     /* temporarily add 1 to i and j indices */
62     int s_nz = a->i[n];
63     for (i=0; i<s_nz; i++) a->j[i]++;
64     for (i=0; i<n+1; i++) a->i[i]++;
65     *ia = a->i; *ja = a->j;
66   } else {
67     *ia = a->i; *ja = a->j;
68   }
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ"
74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
75 {
76   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77   int         i,n = a->mbs;
78 
79   PetscFunctionBegin;
80   if (!ia) PetscFunctionReturn(0);
81 
82   if (oshift == 1) {
83     int s_nz = a->i[n]-1;
84     for (i=0; i<s_nz; i++) a->j[i]--;
85     for (i=0; i<n+1; i++) a->i[i]--;
86   }
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ"
92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
93 {
94   Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
95 
96   PetscFunctionBegin;
97   *bs = sbaij->bs;
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatDestroy_SeqSBAIJ"
103 int MatDestroy_SeqSBAIJ(Mat A)
104 {
105   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
106   int         ierr;
107 
108   PetscFunctionBegin;
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
111 #endif
112   ierr = PetscFree(a->a);CHKERRQ(ierr);
113   if (!a->singlemalloc) {
114     ierr = PetscFree(a->i);CHKERRQ(ierr);
115     ierr = PetscFree(a->j);CHKERRQ(ierr);
116   }
117   if (a->row) {
118     ierr = ISDestroy(a->row);CHKERRQ(ierr);
119   }
120   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
121   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
122   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
123   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
124   if (a->solve_work)  {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
125   if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);}
126   if (a->mult_work)   {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
127   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
128   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
129 
130   if (a->inew){
131     ierr = PetscFree(a->inew);CHKERRQ(ierr);
132     a->inew = 0;
133   }
134   ierr = PetscFree(a);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSetOption_SeqSBAIJ"
140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
141 {
142   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
143 
144   PetscFunctionBegin;
145   switch (op) {
146   case MAT_ROW_ORIENTED:
147     a->roworiented    = PETSC_TRUE;
148     break;
149   case MAT_COLUMN_ORIENTED:
150     a->roworiented    = PETSC_FALSE;
151     break;
152   case MAT_COLUMNS_SORTED:
153     a->sorted         = PETSC_TRUE;
154     break;
155   case MAT_COLUMNS_UNSORTED:
156     a->sorted         = PETSC_FALSE;
157     break;
158   case MAT_KEEP_ZEROED_ROWS:
159     a->keepzeroedrows = PETSC_TRUE;
160     break;
161   case MAT_NO_NEW_NONZERO_LOCATIONS:
162     a->nonew          = 1;
163     break;
164   case MAT_NEW_NONZERO_LOCATION_ERR:
165     a->nonew          = -1;
166     break;
167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
168     a->nonew          = -2;
169     break;
170   case MAT_YES_NEW_NONZERO_LOCATIONS:
171     a->nonew          = 0;
172     break;
173   case MAT_ROWS_SORTED:
174   case MAT_ROWS_UNSORTED:
175   case MAT_YES_NEW_DIAGONALS:
176   case MAT_IGNORE_OFF_PROC_ENTRIES:
177   case MAT_USE_HASH_TABLE:
178     PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
179     break;
180   case MAT_NO_NEW_DIAGONALS:
181     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
182   case MAT_NOT_SYMMETRIC:
183   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
184   case MAT_HERMITIAN:
185     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
186   case MAT_SYMMETRIC:
187   case MAT_STRUCTURALLY_SYMMETRIC:
188   case MAT_NOT_HERMITIAN:
189   case MAT_SYMMETRY_ETERNAL:
190   case MAT_NOT_SYMMETRY_ETERNAL:
191     break;
192   default:
193     SETERRQ(PETSC_ERR_SUP,"unknown option");
194   }
195   PetscFunctionReturn(0);
196 }
197 
198 #undef __FUNCT__
199 #define __FUNCT__ "MatGetRow_SeqSBAIJ"
200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
201 {
202   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
203   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
204   MatScalar    *aa,*aa_i;
205   PetscScalar  *v_i;
206 
207   PetscFunctionBegin;
208   bs  = a->bs;
209   ai  = a->i;
210   aj  = a->j;
211   aa  = a->a;
212   bs2 = a->bs2;
213 
214   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row);
215 
216   bn  = row/bs;   /* Block number */
217   bp  = row % bs; /* Block position */
218   M   = ai[bn+1] - ai[bn];
219   *ncols = bs*M;
220 
221   if (v) {
222     *v = 0;
223     if (*ncols) {
224       ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr);
225       for (i=0; i<M; i++) { /* for each block in the block row */
226         v_i  = *v + i*bs;
227         aa_i = aa + bs2*(ai[bn] + i);
228         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
229       }
230     }
231   }
232 
233   if (cols) {
234     *cols = 0;
235     if (*ncols) {
236       ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr);
237       for (i=0; i<M; i++) { /* for each block in the block row */
238         cols_i = *cols + i*bs;
239         itmp  = bs*aj[ai[bn] + i];
240         for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
241       }
242     }
243   }
244 
245   /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
246   /* this segment is currently removed, so only entries in the upper triangle are obtained */
247 #ifdef column_search
248   v_i    = *v    + M*bs;
249   cols_i = *cols + M*bs;
250   for (i=0; i<bn; i++){ /* for each block row */
251     M = ai[i+1] - ai[i];
252     for (j=0; j<M; j++){
253       itmp = aj[ai[i] + j];    /* block column value */
254       if (itmp == bn){
255         aa_i   = aa    + bs2*(ai[i] + j) + bs*bp;
256         for (k=0; k<bs; k++) {
257           *cols_i++ = i*bs+k;
258           *v_i++    = aa_i[k];
259         }
260         *ncols += bs;
261         break;
262       }
263     }
264   }
265 #endif
266 
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ"
272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
273 {
274   int ierr;
275 
276   PetscFunctionBegin;
277   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
278   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatTranspose_SeqSBAIJ"
284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
285 {
286   int ierr;
287   PetscFunctionBegin;
288   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatView_SeqSBAIJ_Binary"
294 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
295 {
296   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
297   int          i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
298   PetscScalar  *aa;
299   FILE         *file;
300 
301   PetscFunctionBegin;
302   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
303   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
304   col_lens[0] = MAT_FILE_COOKIE;
305 
306   col_lens[1] = A->m;
307   col_lens[2] = A->m;
308   col_lens[3] = a->s_nz*bs2;
309 
310   /* store lengths of each row and write (including header) to file */
311   count = 0;
312   for (i=0; i<a->mbs; i++) {
313     for (j=0; j<bs; j++) {
314       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
315     }
316   }
317   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
318   ierr = PetscFree(col_lens);CHKERRQ(ierr);
319 
320   /* store column indices (zero start index) */
321   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
322   count = 0;
323   for (i=0; i<a->mbs; i++) {
324     for (j=0; j<bs; j++) {
325       for (k=a->i[i]; k<a->i[i+1]; k++) {
326         for (l=0; l<bs; l++) {
327           jj[count++] = bs*a->j[k] + l;
328         }
329       }
330     }
331   }
332   ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr);
333   ierr = PetscFree(jj);CHKERRQ(ierr);
334 
335   /* store nonzero values */
336   ierr  = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
337   count = 0;
338   for (i=0; i<a->mbs; i++) {
339     for (j=0; j<bs; j++) {
340       for (k=a->i[i]; k<a->i[i+1]; k++) {
341         for (l=0; l<bs; l++) {
342           aa[count++] = a->a[bs2*k + l*bs + j];
343         }
344       }
345     }
346   }
347   ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr);
348   ierr = PetscFree(aa);CHKERRQ(ierr);
349 
350   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
351   if (file) {
352     fprintf(file,"-matload_block_size %d\n",a->bs);
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII"
359 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
360 {
361   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
362   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
363   char              *name;
364   PetscViewerFormat format;
365 
366   PetscFunctionBegin;
367   ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr);
368   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
369   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
370     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
371   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
372     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
373   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
374     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
375     for (i=0; i<a->mbs; i++) {
376       for (j=0; j<bs; j++) {
377         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
378         for (k=a->i[i]; k<a->i[i+1]; k++) {
379           for (l=0; l<bs; l++) {
380 #if defined(PETSC_USE_COMPLEX)
381             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
382               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
383                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
384             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
385               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
386                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
387             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
388               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
389             }
390 #else
391             if (a->a[bs2*k + l*bs + j] != 0.0) {
392               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
393             }
394 #endif
395           }
396         }
397         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
398       }
399     }
400     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
401   } else {
402     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
403     for (i=0; i<a->mbs; i++) {
404       for (j=0; j<bs; j++) {
405         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
406         for (k=a->i[i]; k<a->i[i+1]; k++) {
407           for (l=0; l<bs; l++) {
408 #if defined(PETSC_USE_COMPLEX)
409             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
410               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
411                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
412             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
413               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
414                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
415             } else {
416               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
417             }
418 #else
419             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
420 #endif
421           }
422         }
423         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
424       }
425     }
426     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
427   }
428   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom"
434 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
435 {
436   Mat          A = (Mat) Aa;
437   Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
438   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
439   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
440   MatScalar    *aa;
441   MPI_Comm     comm;
442   PetscViewer  viewer;
443 
444   PetscFunctionBegin;
445   /*
446       This is nasty. If this is called from an originally parallel matrix
447    then all processes call this,but only the first has the matrix so the
448    rest should return immediately.
449   */
450   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
451   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
452   if (rank) PetscFunctionReturn(0);
453 
454   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
455 
456   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
457   PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
458 
459   /* loop over matrix elements drawing boxes */
460   color = PETSC_DRAW_BLUE;
461   for (i=0,row=0; i<mbs; i++,row+=bs) {
462     for (j=a->i[i]; j<a->i[i+1]; j++) {
463       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
464       x_l = a->j[j]*bs; x_r = x_l + 1.0;
465       aa = a->a + j*bs2;
466       for (k=0; k<bs; k++) {
467         for (l=0; l<bs; l++) {
468           if (PetscRealPart(*aa++) >=  0.) continue;
469           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
470         }
471       }
472     }
473   }
474   color = PETSC_DRAW_CYAN;
475   for (i=0,row=0; i<mbs; i++,row+=bs) {
476     for (j=a->i[i]; j<a->i[i+1]; j++) {
477       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
478       x_l = a->j[j]*bs; x_r = x_l + 1.0;
479       aa = a->a + j*bs2;
480       for (k=0; k<bs; k++) {
481         for (l=0; l<bs; l++) {
482           if (PetscRealPart(*aa++) != 0.) continue;
483           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488 
489   color = PETSC_DRAW_RED;
490   for (i=0,row=0; i<mbs; i++,row+=bs) {
491     for (j=a->i[i]; j<a->i[i+1]; j++) {
492       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
493       x_l = a->j[j]*bs; x_r = x_l + 1.0;
494       aa = a->a + j*bs2;
495       for (k=0; k<bs; k++) {
496         for (l=0; l<bs; l++) {
497           if (PetscRealPart(*aa++) <= 0.) continue;
498           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
499         }
500       }
501     }
502   }
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatView_SeqSBAIJ_Draw"
508 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
509 {
510   int          ierr;
511   PetscReal    xl,yl,xr,yr,w,h;
512   PetscDraw    draw;
513   PetscTruth   isnull;
514 
515   PetscFunctionBegin;
516 
517   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
518   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
519 
520   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
521   xr  = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
522   xr += w;    yr += h;  xl = -w;     yl = -h;
523   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
524   ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
525   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatView_SeqSBAIJ"
531 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
532 {
533   int        ierr;
534   PetscTruth issocket,isascii,isbinary,isdraw;
535 
536   PetscFunctionBegin;
537   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
538   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
539   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
540   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
541   if (issocket) {
542     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
543   } else if (isascii){
544     ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
545   } else if (isbinary) {
546     ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr);
547   } else if (isdraw) {
548     ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr);
549   } else {
550     SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name);
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatGetValues_SeqSBAIJ"
558 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
559 {
560   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
561   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
562   int        *ai = a->i,*ailen = a->ilen;
563   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
564   MatScalar  *ap,*aa = a->a,zero = 0.0;
565 
566   PetscFunctionBegin;
567   for (k=0; k<m; k++) { /* loop over rows */
568     row  = im[k]; brow = row/bs;
569     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
570     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
571     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
572     nrow = ailen[brow];
573     for (l=0; l<n; l++) { /* loop over columns */
574       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]);
575       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
576       col  = in[l] ;
577       bcol = col/bs;
578       cidx = col%bs;
579       ridx = row%bs;
580       high = nrow;
581       low  = 0; /* assume unsorted */
582       while (high-low > 5) {
583         t = (low+high)/2;
584         if (rp[t] > bcol) high = t;
585         else             low  = t;
586       }
587       for (i=low; i<high; i++) {
588         if (rp[i] > bcol) break;
589         if (rp[i] == bcol) {
590           *v++ = ap[bs2*i+bs*cidx+ridx];
591           goto finished;
592         }
593       }
594       *v++ = zero;
595       finished:;
596     }
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ"
604 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
605 {
606   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
607   int             *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
608   int             *imax=a->imax,*ai=a->i,*ailen=a->ilen;
609   int             *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
610   PetscTruth      roworiented=a->roworiented;
611   const MatScalar *value = v;
612   MatScalar       *ap,*aa = a->a,*bap;
613 
614   PetscFunctionBegin;
615   if (roworiented) {
616     stepval = (n-1)*bs;
617   } else {
618     stepval = (m-1)*bs;
619   }
620   for (k=0; k<m; k++) { /* loop over added rows */
621     row  = im[k];
622     if (row < 0) continue;
623 #if defined(PETSC_USE_BOPT_g)
624     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
625 #endif
626     rp   = aj + ai[row];
627     ap   = aa + bs2*ai[row];
628     rmax = imax[row];
629     nrow = ailen[row];
630     low  = 0;
631     for (l=0; l<n; l++) { /* loop over added columns */
632       if (in[l] < 0) continue;
633       col = in[l];
634 #if defined(PETSC_USE_BOPT_g)
635       if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1);
636 #endif
637       if (col < row) continue; /* ignore lower triangular block */
638       if (roworiented) {
639         value = v + k*(stepval+bs)*bs + l*bs;
640       } else {
641         value = v + l*(stepval+bs)*bs + k*bs;
642       }
643       if (!sorted) low = 0; high = nrow;
644       while (high-low > 7) {
645         t = (low+high)/2;
646         if (rp[t] > col) high = t;
647         else             low  = t;
648       }
649       for (i=low; i<high; i++) {
650         if (rp[i] > col) break;
651         if (rp[i] == col) {
652           bap  = ap +  bs2*i;
653           if (roworiented) {
654             if (is == ADD_VALUES) {
655               for (ii=0; ii<bs; ii++,value+=stepval) {
656                 for (jj=ii; jj<bs2; jj+=bs) {
657                   bap[jj] += *value++;
658                 }
659               }
660             } else {
661               for (ii=0; ii<bs; ii++,value+=stepval) {
662                 for (jj=ii; jj<bs2; jj+=bs) {
663                   bap[jj] = *value++;
664                 }
665               }
666             }
667           } else {
668             if (is == ADD_VALUES) {
669               for (ii=0; ii<bs; ii++,value+=stepval) {
670                 for (jj=0; jj<bs; jj++) {
671                   *bap++ += *value++;
672                 }
673               }
674             } else {
675               for (ii=0; ii<bs; ii++,value+=stepval) {
676                 for (jj=0; jj<bs; jj++) {
677                   *bap++  = *value++;
678                 }
679               }
680             }
681           }
682           goto noinsert2;
683         }
684       }
685       if (nonew == 1) goto noinsert2;
686       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
687       if (nrow >= rmax) {
688         /* there is no extra room in row, therefore enlarge */
689         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
690         MatScalar *new_a;
691 
692         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
693 
694         /* malloc new storage space */
695         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
696 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
697         new_j   = (int*)(new_a + bs2*new_nz);
698         new_i   = new_j + new_nz;
699 
700         /* copy over old data into new slots */
701         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
702         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
703         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
704         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
705         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
706         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
707         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
708         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
709         /* free up old matrix storage */
710         ierr = PetscFree(a->a);CHKERRQ(ierr);
711         if (!a->singlemalloc) {
712           ierr = PetscFree(a->i);CHKERRQ(ierr);
713           ierr = PetscFree(a->j);CHKERRQ(ierr);
714         }
715         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
716         a->singlemalloc = PETSC_TRUE;
717 
718         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
719         rmax = imax[row] = imax[row] + CHUNKSIZE;
720         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
721         a->s_maxnz += bs2*CHUNKSIZE;
722         a->reallocs++;
723         a->s_nz++;
724       }
725       N = nrow++ - 1;
726       /* shift up all the later entries in this row */
727       for (ii=N; ii>=i; ii--) {
728         rp[ii+1] = rp[ii];
729         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
730       }
731       if (N >= i) {
732         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
733       }
734       rp[i] = col;
735       bap   = ap +  bs2*i;
736       if (roworiented) {
737         for (ii=0; ii<bs; ii++,value+=stepval) {
738           for (jj=ii; jj<bs2; jj+=bs) {
739             bap[jj] = *value++;
740           }
741         }
742       } else {
743         for (ii=0; ii<bs; ii++,value+=stepval) {
744           for (jj=0; jj<bs; jj++) {
745             *bap++  = *value++;
746           }
747         }
748       }
749       noinsert2:;
750       low = i;
751     }
752     ailen[row] = nrow;
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ"
759 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
760 {
761   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
762   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
763   int        m = A->m,*ip,N,*ailen = a->ilen;
764   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
765   MatScalar  *aa = a->a,*ap;
766 
767   PetscFunctionBegin;
768   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
769 
770   if (m) rmax = ailen[0];
771   for (i=1; i<mbs; i++) {
772     /* move each row back by the amount of empty slots (fshift) before it*/
773     fshift += imax[i-1] - ailen[i-1];
774     rmax   = PetscMax(rmax,ailen[i]);
775     if (fshift) {
776       ip = aj + ai[i]; ap = aa + bs2*ai[i];
777       N = ailen[i];
778       for (j=0; j<N; j++) {
779         ip[j-fshift] = ip[j];
780         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
781       }
782     }
783     ai[i] = ai[i-1] + ailen[i-1];
784   }
785   if (mbs) {
786     fshift += imax[mbs-1] - ailen[mbs-1];
787     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
788   }
789   /* reset ilen and imax for each row */
790   for (i=0; i<mbs; i++) {
791     ailen[i] = imax[i] = ai[i+1] - ai[i];
792   }
793   a->s_nz = ai[mbs];
794 
795   /* diagonals may have moved, reset it */
796   if (a->diag) {
797     ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr);
798   }
799   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
800            m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
801   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n",
802            a->reallocs);
803   PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
804   a->reallocs          = 0;
805   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
806 
807   PetscFunctionReturn(0);
808 }
809 
810 /*
811    This function returns an array of flags which indicate the locations of contiguous
812    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
813    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
814    Assume: sizes should be long enough to hold all the values.
815 */
816 #undef __FUNCT__
817 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks"
818 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
819 {
820   int        i,j,k,row;
821   PetscTruth flg;
822 
823   PetscFunctionBegin;
824   for (i=0,j=0; i<n; j++) {
825     row = idx[i];
826     if (row%bs!=0) { /* Not the begining of a block */
827       sizes[j] = 1;
828       i++;
829     } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
830       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
831       i++;
832     } else { /* Begining of the block, so check if the complete block exists */
833       flg = PETSC_TRUE;
834       for (k=1; k<bs; k++) {
835         if (row+k != idx[i+k]) { /* break in the block */
836           flg = PETSC_FALSE;
837           break;
838         }
839       }
840       if (flg == PETSC_TRUE) { /* No break in the bs */
841         sizes[j] = bs;
842         i+= bs;
843       } else {
844         sizes[j] = 1;
845         i++;
846       }
847     }
848   }
849   *bs_max = j;
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "MatZeroRows_SeqSBAIJ"
855 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
856 {
857   PetscFunctionBegin;
858   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
859 }
860 
861 /* Only add/insert a(i,j) with i<=j (blocks).
862    Any a(i,j) with i>j input by user is ingored.
863 */
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatSetValues_SeqSBAIJ"
867 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
868 {
869   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
870   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
871   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
872   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
873   int         ridx,cidx,bs2=a->bs2,ierr;
874   MatScalar   *ap,value,*aa=a->a,*bap;
875 
876   PetscFunctionBegin;
877 
878   for (k=0; k<m; k++) { /* loop over added rows */
879     row  = im[k];       /* row number */
880     brow = row/bs;      /* block row number */
881     if (row < 0) continue;
882 #if defined(PETSC_USE_BOPT_g)
883     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
884 #endif
885     rp   = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
886     ap   = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
887     rmax = imax[brow];  /* maximum space allocated for this row */
888     nrow = ailen[brow]; /* actual length of this row */
889     low  = 0;
890 
891     for (l=0; l<n; l++) { /* loop over added columns */
892       if (in[l] < 0) continue;
893 #if defined(PETSC_USE_BOPT_g)
894       if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1);
895 #endif
896       col = in[l];
897       bcol = col/bs;              /* block col number */
898 
899       if (brow <= bcol){
900         ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
901         if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
902           /* element value a(k,l) */
903           if (roworiented) {
904             value = v[l + k*n];
905           } else {
906             value = v[k + l*m];
907           }
908 
909           /* move pointer bap to a(k,l) quickly and add/insert value */
910           if (!sorted) low = 0; high = nrow;
911           while (high-low > 7) {
912             t = (low+high)/2;
913             if (rp[t] > bcol) high = t;
914             else              low  = t;
915           }
916           for (i=low; i<high; i++) {
917             /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */
918             if (rp[i] > bcol) break;
919             if (rp[i] == bcol) {
920               bap  = ap +  bs2*i + bs*cidx + ridx;
921               if (is == ADD_VALUES) *bap += value;
922               else                  *bap  = value;
923               /* for diag block, add/insert its symmetric element a(cidx,ridx) */
924               if (brow == bcol && ridx < cidx){
925                 bap  = ap +  bs2*i + bs*ridx + cidx;
926                 if (is == ADD_VALUES) *bap += value;
927                 else                  *bap  = value;
928               }
929               goto noinsert1;
930             }
931           }
932 
933       if (nonew == 1) goto noinsert1;
934       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
935       if (nrow >= rmax) {
936         /* there is no extra room in row, therefore enlarge */
937         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
938         MatScalar *new_a;
939 
940         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
941 
942         /* Malloc new storage space */
943         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
944         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr);
945         new_j = (int*)(new_a + bs2*new_nz);
946         new_i = new_j + new_nz;
947 
948         /* copy over old data into new slots */
949         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
950         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
951         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
952         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
953         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
954         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
955         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
956         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
957         /* free up old matrix storage */
958         ierr = PetscFree(a->a);CHKERRQ(ierr);
959         if (!a->singlemalloc) {
960           ierr = PetscFree(a->i);CHKERRQ(ierr);
961           ierr = PetscFree(a->j);CHKERRQ(ierr);
962         }
963         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
964         a->singlemalloc = PETSC_TRUE;
965 
966         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
967         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
968         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
969         a->s_maxnz += bs2*CHUNKSIZE;
970         a->reallocs++;
971         a->s_nz++;
972       }
973 
974       N = nrow++ - 1;
975       /* shift up all the later entries in this row */
976       for (ii=N; ii>=i; ii--) {
977         rp[ii+1] = rp[ii];
978         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
979       }
980       if (N>=i) {
981         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
982       }
983       rp[i]                      = bcol;
984       ap[bs2*i + bs*cidx + ridx] = value;
985       noinsert1:;
986       low = i;
987       /* } */
988         }
989       } /* end of if .. if..  */
990     }                     /* end of loop over added columns */
991     ailen[brow] = nrow;
992   }                       /* end of loop over added rows */
993 
994   PetscFunctionReturn(0);
995 }
996 
997 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
998 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
999 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int);
1000 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1001 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]);
1002 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
1003 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
1004 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat);
1005 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
1006 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
1007 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
1008 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
1009 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
1010 extern int MatZeroEntries_SeqSBAIJ(Mat);
1011 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
1012 
1013 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
1014 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
1015 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
1016 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
1017 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
1018 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
1019 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
1020 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
1021 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
1022 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
1023 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
1024 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
1025 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
1026 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
1027 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
1028 
1029 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
1030 
1031 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
1032 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
1033 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
1034 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1035 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
1036 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
1037 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
1038 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
1039 
1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
1048 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*);
1049 
1050 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
1051 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
1052 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
1053 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
1054 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
1055 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
1056 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
1057 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
1058 
1059 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
1060 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
1061 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
1062 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
1063 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
1064 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
1065 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
1066 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
1067 
1068 #ifdef HAVE_MatICCFactor
1069 /* modefied from MatILUFactor_SeqSBAIJ, needs further work!  */
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatICCFactor_SeqSBAIJ"
1072 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
1073 {
1074   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1075   Mat         outA;
1076   int         ierr;
1077   PetscTruth  row_identity,col_identity;
1078 
1079   PetscFunctionBegin;
1080   /*
1081   if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1082   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1083   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1084   if (!row_identity || !col_identity) {
1085     SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
1086   }
1087   */
1088 
1089   outA          = inA;
1090   inA->factor   = FACTOR_CHOLESKY;
1091 
1092   if (!a->diag) {
1093     ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr);
1094   }
1095   /*
1096       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1097       for ILU(0) factorization with natural ordering
1098   */
1099   switch (a->bs) {
1100   case 1:
1101     inA->ops->solve            = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1102     inA->ops->solvetranspose   = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1103     inA->ops->solves           = MatSolves_SeqSBAIJ_1;
1104     PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1105   case 2:
1106     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1107     inA->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1108     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1109     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1110     break;
1111   case 3:
1112     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1113     inA->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1114     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1115     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1116     break;
1117   case 4:
1118     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1119     inA->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1120     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1121     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1122     break;
1123   case 5:
1124     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1125     inA->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1126     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1127     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1128     break;
1129   case 6:
1130     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1131     inA->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1132     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1133     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1134     break;
1135   case 7:
1136     inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1137     inA->ops->solvetranspose  = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1138     inA->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1139     PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1140     break;
1141   default:
1142     a->row        = row;
1143     a->icol       = col;
1144     ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1145     ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1146 
1147     /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1148     ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr);
1149     PetscLogObjectParent(inA,a->icol);
1150 
1151     if (!a->solve_work) {
1152       ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1153       PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1154     }
1155   }
1156 
1157   ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr);
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 #endif
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ"
1165 int MatPrintHelp_SeqSBAIJ(Mat A)
1166 {
1167   static PetscTruth called = PETSC_FALSE;
1168   MPI_Comm          comm = A->comm;
1169   int               ierr;
1170 
1171   PetscFunctionBegin;
1172   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1173   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1174   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 EXTERN_C_BEGIN
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ"
1181 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1182 {
1183   Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1184   int         i,nz,n;
1185 
1186   PetscFunctionBegin;
1187   nz = baij->s_maxnz;
1188   n  = mat->n;
1189   for (i=0; i<nz; i++) {
1190     baij->j[i] = indices[i];
1191   }
1192   baij->s_nz = nz;
1193   for (i=0; i<n; i++) {
1194     baij->ilen[i] = baij->imax[i];
1195   }
1196 
1197   PetscFunctionReturn(0);
1198 }
1199 EXTERN_C_END
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices"
1203 /*@
1204     MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1205        in the matrix.
1206 
1207   Input Parameters:
1208 +  mat     - the SeqSBAIJ matrix
1209 -  indices - the column indices
1210 
1211   Level: advanced
1212 
1213   Notes:
1214     This can be called if you have precomputed the nonzero structure of the
1215   matrix and want to provide it to the matrix object to improve the performance
1216   of the MatSetValues() operation.
1217 
1218     You MUST have set the correct numbers of nonzeros per row in the call to
1219   MatCreateSeqSBAIJ().
1220 
1221     MUST be called before any calls to MatSetValues()
1222 
1223 .seealso: MatCreateSeqSBAIJ
1224 @*/
1225 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1226 {
1227   int ierr,(*f)(Mat,int *);
1228 
1229   PetscFunctionBegin;
1230   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1231   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1232   if (f) {
1233     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1234   } else {
1235     SETERRQ(1,"Wrong type of matrix to set column indices");
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ"
1242 int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1243 {
1244   int        ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr =  MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatGetArray_SeqSBAIJ"
1253 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1254 {
1255   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1256   PetscFunctionBegin;
1257   *array = a->a;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ"
1263 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1264 {
1265   PetscFunctionBegin;
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include "petscblaslapack.h"
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatAXPY_SeqSBAIJ"
1272 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1273 {
1274   Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1275   int          ierr,one=1,i,bs=y->bs,bs2,j;
1276 
1277   PetscFunctionBegin;
1278   if (str == SAME_NONZERO_PATTERN) {
1279     BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1280   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1281     if (y->xtoy && y->XtoY != X) {
1282       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1283       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1284     }
1285     if (!y->xtoy) { /* get xtoy */
1286       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1287       y->XtoY = X;
1288     }
1289     bs2 = bs*bs;
1290     for (i=0; i<x->s_nz; i++) {
1291       j = 0;
1292       while (j < bs2){
1293         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1294         j++;
1295       }
1296     }
1297     PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz));
1298   } else {
1299     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ"
1306 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1307 {
1308   PetscFunctionBegin;
1309   *flg = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ"
1315 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1316 {
1317   PetscFunctionBegin;
1318   *flg = PETSC_TRUE;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ"
1324 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1325 {
1326   PetscFunctionBegin;
1327   *flg = PETSC_FALSE;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* -------------------------------------------------------------------*/
1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1333        MatGetRow_SeqSBAIJ,
1334        MatRestoreRow_SeqSBAIJ,
1335        MatMult_SeqSBAIJ_N,
1336 /* 4*/ MatMultAdd_SeqSBAIJ_N,
1337        MatMultTranspose_SeqSBAIJ,
1338        MatMultTransposeAdd_SeqSBAIJ,
1339        MatSolve_SeqSBAIJ_N,
1340        0,
1341        0,
1342 /*10*/ 0,
1343        0,
1344        MatCholeskyFactor_SeqSBAIJ,
1345        MatRelax_SeqSBAIJ,
1346        MatTranspose_SeqSBAIJ,
1347 /*15*/ MatGetInfo_SeqSBAIJ,
1348        MatEqual_SeqSBAIJ,
1349        MatGetDiagonal_SeqSBAIJ,
1350        MatDiagonalScale_SeqSBAIJ,
1351        MatNorm_SeqSBAIJ,
1352 /*20*/ 0,
1353        MatAssemblyEnd_SeqSBAIJ,
1354        0,
1355        MatSetOption_SeqSBAIJ,
1356        MatZeroEntries_SeqSBAIJ,
1357 /*25*/ MatZeroRows_SeqSBAIJ,
1358        0,
1359        0,
1360        MatCholeskyFactorSymbolic_SeqSBAIJ,
1361        MatCholeskyFactorNumeric_SeqSBAIJ_N,
1362 /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1363        0,
1364        MatICCFactorSymbolic_SeqSBAIJ,
1365        MatGetArray_SeqSBAIJ,
1366        MatRestoreArray_SeqSBAIJ,
1367 /*35*/ MatDuplicate_SeqSBAIJ,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*40*/ MatAXPY_SeqSBAIJ,
1373        MatGetSubMatrices_SeqSBAIJ,
1374        MatIncreaseOverlap_SeqSBAIJ,
1375        MatGetValues_SeqSBAIJ,
1376        0,
1377 /*45*/ MatPrintHelp_SeqSBAIJ,
1378        MatScale_SeqSBAIJ,
1379        0,
1380        0,
1381        0,
1382 /*50*/ MatGetBlockSize_SeqSBAIJ,
1383        MatGetRowIJ_SeqSBAIJ,
1384        MatRestoreRowIJ_SeqSBAIJ,
1385        0,
1386        0,
1387 /*55*/ 0,
1388        0,
1389        0,
1390        0,
1391        MatSetValuesBlocked_SeqSBAIJ,
1392 /*60*/ MatGetSubMatrix_SeqSBAIJ,
1393        0,
1394        0,
1395        MatGetPetscMaps_Petsc,
1396        0,
1397 /*65*/ 0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*70*/ MatGetRowMax_SeqSBAIJ,
1403        0,
1404        0,
1405        0,
1406        0,
1407 /*75*/ 0,
1408        0,
1409        0,
1410        0,
1411        0,
1412 /*80*/ 0,
1413        0,
1414        0,
1415        0,
1416 #if !defined(PETSC_USE_COMPLEX)
1417        MatGetInertia_SeqSBAIJ,
1418 #else
1419        0,
1420 #endif
1421 /*85*/ MatLoad_SeqSBAIJ,
1422        MatIsSymmetric_SeqSBAIJ,
1423        MatIsStructurallySymmetric_SeqSBAIJ,
1424        MatIsHermitian_SeqSBAIJ
1425 };
1426 
1427 EXTERN_C_BEGIN
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatStoreValues_SeqSBAIJ"
1430 int MatStoreValues_SeqSBAIJ(Mat mat)
1431 {
1432   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1433   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1434   int         ierr;
1435 
1436   PetscFunctionBegin;
1437   if (aij->nonew != 1) {
1438     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1439   }
1440 
1441   /* allocate space for values if not already there */
1442   if (!aij->saved_values) {
1443     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1444   }
1445 
1446   /* copy values over */
1447   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 EXTERN_C_END
1451 
1452 EXTERN_C_BEGIN
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ"
1455 int MatRetrieveValues_SeqSBAIJ(Mat mat)
1456 {
1457   Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1458   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1459 
1460   PetscFunctionBegin;
1461   if (aij->nonew != 1) {
1462     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1463   }
1464   if (!aij->saved_values) {
1465     SETERRQ(1,"Must call MatStoreValues(A);first");
1466   }
1467 
1468   /* copy values over */
1469   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 EXTERN_C_END
1473 
1474 EXTERN_C_BEGIN
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ"
1477 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz)
1478 {
1479   Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1480   int          i,len,ierr,mbs,bs2;
1481   PetscTruth   flg;
1482   int          s_nz;
1483 
1484   PetscFunctionBegin;
1485   B->preallocated = PETSC_TRUE;
1486   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1487   mbs  = B->m/bs;
1488   bs2  = bs*bs;
1489 
1490   if (mbs*bs != B->m) {
1491     SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1492   }
1493 
1494   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1495   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1496   if (nnz) {
1497     for (i=0; i<mbs; i++) {
1498       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1499       if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1500     }
1501   }
1502 
1503   ierr    = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1504   if (!flg) {
1505     switch (bs) {
1506     case 1:
1507       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1508       B->ops->solve           = MatSolve_SeqSBAIJ_1;
1509       B->ops->solves          = MatSolves_SeqSBAIJ_1;
1510       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_1;
1511       B->ops->mult            = MatMult_SeqSBAIJ_1;
1512       B->ops->multadd         = MatMultAdd_SeqSBAIJ_1;
1513       break;
1514     case 2:
1515       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1516       B->ops->solve           = MatSolve_SeqSBAIJ_2;
1517       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_2;
1518       B->ops->mult            = MatMult_SeqSBAIJ_2;
1519       B->ops->multadd         = MatMultAdd_SeqSBAIJ_2;
1520       break;
1521     case 3:
1522       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1523       B->ops->solve           = MatSolve_SeqSBAIJ_3;
1524       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_3;
1525       B->ops->mult            = MatMult_SeqSBAIJ_3;
1526       B->ops->multadd         = MatMultAdd_SeqSBAIJ_3;
1527       break;
1528     case 4:
1529       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1530       B->ops->solve           = MatSolve_SeqSBAIJ_4;
1531       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_4;
1532       B->ops->mult            = MatMult_SeqSBAIJ_4;
1533       B->ops->multadd         = MatMultAdd_SeqSBAIJ_4;
1534       break;
1535     case 5:
1536       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1537       B->ops->solve           = MatSolve_SeqSBAIJ_5;
1538       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_5;
1539       B->ops->mult            = MatMult_SeqSBAIJ_5;
1540       B->ops->multadd         = MatMultAdd_SeqSBAIJ_5;
1541       break;
1542     case 6:
1543       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1544       B->ops->solve           = MatSolve_SeqSBAIJ_6;
1545       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_6;
1546       B->ops->mult            = MatMult_SeqSBAIJ_6;
1547       B->ops->multadd         = MatMultAdd_SeqSBAIJ_6;
1548       break;
1549     case 7:
1550       B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1551       B->ops->solve           = MatSolve_SeqSBAIJ_7;
1552       B->ops->solvetranspose  = MatSolveTranspose_SeqSBAIJ_7;
1553       B->ops->mult            = MatMult_SeqSBAIJ_7;
1554       B->ops->multadd         = MatMultAdd_SeqSBAIJ_7;
1555       break;
1556     }
1557   }
1558 
1559   b->mbs = mbs;
1560   b->nbs = mbs;
1561   ierr   = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1562   if (!nnz) {
1563     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1564     else if (nz <= 0)        nz = 1;
1565     for (i=0; i<mbs; i++) {
1566       b->imax[i] = nz;
1567     }
1568     nz = nz*mbs; /* total nz */
1569   } else {
1570     nz = 0;
1571     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1572   }
1573   /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1574   s_nz = nz;
1575 
1576   /* allocate the matrix space */
1577   len  = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1578   ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1579   ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1580   b->j = (int*)(b->a + s_nz*bs2);
1581   ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr);
1582   b->i = b->j + s_nz;
1583   b->singlemalloc = PETSC_TRUE;
1584 
1585   /* pointer to beginning of each row */
1586   b->i[0] = 0;
1587   for (i=1; i<mbs+1; i++) {
1588     b->i[i] = b->i[i-1] + b->imax[i-1];
1589   }
1590 
1591   /* b->ilen will count nonzeros in each block row so far. */
1592   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1593   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1594   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1595 
1596   b->bs               = bs;
1597   b->bs2              = bs2;
1598   b->s_nz             = 0;
1599   b->s_maxnz          = s_nz*bs2;
1600 
1601   b->inew             = 0;
1602   b->jnew             = 0;
1603   b->anew             = 0;
1604   b->a2anew           = 0;
1605   b->permute          = PETSC_FALSE;
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 /*MC
1611    MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1612    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1613 
1614    Options Database Keys:
1615 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1616 
1617   Level: beginner
1618 
1619 .seealso: MatCreateSeqSBAIJ
1620 M*/
1621 
1622 EXTERN_C_BEGIN
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatCreate_SeqSBAIJ"
1625 int MatCreate_SeqSBAIJ(Mat B)
1626 {
1627   Mat_SeqSBAIJ *b;
1628   int          ierr,size;
1629 
1630   PetscFunctionBegin;
1631   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1632   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1633   B->m = B->M = PetscMax(B->m,B->M);
1634   B->n = B->N = PetscMax(B->n,B->N);
1635 
1636   ierr    = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr);
1637   B->data = (void*)b;
1638   ierr    = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr);
1639   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1640   B->ops->destroy     = MatDestroy_SeqSBAIJ;
1641   B->ops->view        = MatView_SeqSBAIJ;
1642   B->factor           = 0;
1643   B->lupivotthreshold = 1.0;
1644   B->mapping          = 0;
1645   b->row              = 0;
1646   b->icol             = 0;
1647   b->reallocs         = 0;
1648   b->saved_values     = 0;
1649 
1650   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1651   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1652 
1653   b->sorted           = PETSC_FALSE;
1654   b->roworiented      = PETSC_TRUE;
1655   b->nonew            = 0;
1656   b->diag             = 0;
1657   b->solve_work       = 0;
1658   b->mult_work        = 0;
1659   B->spptr            = 0;
1660   b->keepzeroedrows   = PETSC_FALSE;
1661   b->xtoy             = 0;
1662   b->XtoY             = 0;
1663 
1664   b->inew             = 0;
1665   b->jnew             = 0;
1666   b->anew             = 0;
1667   b->a2anew           = 0;
1668   b->permute          = PETSC_FALSE;
1669 
1670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1671                                      "MatStoreValues_SeqSBAIJ",
1672                                      MatStoreValues_SeqSBAIJ);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1674                                      "MatRetrieveValues_SeqSBAIJ",
1675                                      (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1677                                      "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1678                                      MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1680                                      "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1681                                      MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 EXTERN_C_END
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatSeqSBAIJSetPreallocation"
1688 /*@C
1689    MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1690    compressed row) format.  For good matrix assembly performance the
1691    user should preallocate the matrix storage by setting the parameter nz
1692    (or the array nnz).  By setting these parameters accurately, performance
1693    during matrix assembly can be increased by more than a factor of 50.
1694 
1695    Collective on Mat
1696 
1697    Input Parameters:
1698 +  A - the symmetric matrix
1699 .  bs - size of block
1700 .  nz - number of block nonzeros per block row (same for all rows)
1701 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1702          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1703 
1704    Options Database Keys:
1705 .   -mat_no_unroll - uses code that does not unroll the loops in the
1706                      block calculations (much slower)
1707 .    -mat_block_size - size of the blocks to use
1708 
1709    Level: intermediate
1710 
1711    Notes:
1712    Specify the preallocated storage with either nz or nnz (not both).
1713    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1714    allocation.  For additional details, see the users manual chapter on
1715    matrices.
1716 
1717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1718 @*/
1719 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) {
1720   int ierr,(*f)(Mat,int,int,const int[]);
1721 
1722   PetscFunctionBegin;
1723   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1724   if (f) {
1725     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCreateSeqSBAIJ"
1732 /*@C
1733    MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1734    compressed row) format.  For good matrix assembly performance the
1735    user should preallocate the matrix storage by setting the parameter nz
1736    (or the array nnz).  By setting these parameters accurately, performance
1737    during matrix assembly can be increased by more than a factor of 50.
1738 
1739    Collective on MPI_Comm
1740 
1741    Input Parameters:
1742 +  comm - MPI communicator, set to PETSC_COMM_SELF
1743 .  bs - size of block
1744 .  m - number of rows, or number of columns
1745 .  nz - number of block nonzeros per block row (same for all rows)
1746 -  nnz - array containing the number of block nonzeros in the upper triangular plus
1747          diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1748 
1749    Output Parameter:
1750 .  A - the symmetric matrix
1751 
1752    Options Database Keys:
1753 .   -mat_no_unroll - uses code that does not unroll the loops in the
1754                      block calculations (much slower)
1755 .    -mat_block_size - size of the blocks to use
1756 
1757    Level: intermediate
1758 
1759    Notes:
1760 
1761    Specify the preallocated storage with either nz or nnz (not both).
1762    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1763    allocation.  For additional details, see the users manual chapter on
1764    matrices.
1765 
1766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1767 @*/
1768 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
1769 {
1770   int ierr;
1771 
1772   PetscFunctionBegin;
1773   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1774   ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1775   ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatDuplicate_SeqSBAIJ"
1781 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1782 {
1783   Mat         C;
1784   Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1785   int         i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1786 
1787   PetscFunctionBegin;
1788   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1789 
1790   *B = 0;
1791   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1792   ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr);
1793   c    = (Mat_SeqSBAIJ*)C->data;
1794 
1795   ierr              = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1796   C->preallocated   = PETSC_TRUE;
1797   C->factor         = A->factor;
1798   c->row            = 0;
1799   c->icol           = 0;
1800   c->saved_values   = 0;
1801   c->keepzeroedrows = a->keepzeroedrows;
1802   C->assembled      = PETSC_TRUE;
1803 
1804   c->bs         = a->bs;
1805   c->bs2        = a->bs2;
1806   c->mbs        = a->mbs;
1807   c->nbs        = a->nbs;
1808 
1809   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1810   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1811   for (i=0; i<mbs; i++) {
1812     c->imax[i] = a->imax[i];
1813     c->ilen[i] = a->ilen[i];
1814   }
1815 
1816   /* allocate the matrix space */
1817   c->singlemalloc = PETSC_TRUE;
1818   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1819   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1820   c->j = (int*)(c->a + nz*bs2);
1821   c->i = c->j + nz;
1822   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1823   if (mbs > 0) {
1824     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1825     if (cpvalues == MAT_COPY_VALUES) {
1826       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1827     } else {
1828       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1829     }
1830   }
1831 
1832   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1833   c->sorted      = a->sorted;
1834   c->roworiented = a->roworiented;
1835   c->nonew       = a->nonew;
1836 
1837   if (a->diag) {
1838     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1839     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1840     for (i=0; i<mbs; i++) {
1841       c->diag[i] = a->diag[i];
1842     }
1843   } else c->diag        = 0;
1844   c->s_nz               = a->s_nz;
1845   c->s_maxnz            = a->s_maxnz;
1846   c->solve_work         = 0;
1847   C->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1848   c->mult_work          = 0;
1849   *B = C;
1850   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatLoad_SeqSBAIJ"
1856 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1857 {
1858   Mat_SeqSBAIJ *a;
1859   Mat          B;
1860   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1861   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1862   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1863   int          *masked,nmask,tmp,bs2,ishift;
1864   PetscScalar  *aa;
1865   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1866 
1867   PetscFunctionBegin;
1868   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1869   bs2  = bs*bs;
1870 
1871   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1872   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1873   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1874   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1875   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1876   M = header[1]; N = header[2]; nz = header[3];
1877 
1878   if (header[3] < 0) {
1879     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1880   }
1881 
1882   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1883 
1884   /*
1885      This code adds extra rows to make sure the number of rows is
1886     divisible by the blocksize
1887   */
1888   mbs        = M/bs;
1889   extra_rows = bs - M + bs*(mbs);
1890   if (extra_rows == bs) extra_rows = 0;
1891   else                  mbs++;
1892   if (extra_rows) {
1893     PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1894   }
1895 
1896   /* read in row lengths */
1897   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1898   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1899   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1900 
1901   /* read in column indices */
1902   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1903   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1904   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1905 
1906   /* loop over row lengths determining block row lengths */
1907   ierr     = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr);
1908   ierr     = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1909   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1910   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1911   masked   = mask + mbs;
1912   rowcount = 0; nzcount = 0;
1913   for (i=0; i<mbs; i++) {
1914     nmask = 0;
1915     for (j=0; j<bs; j++) {
1916       kmax = rowlengths[rowcount];
1917       for (k=0; k<kmax; k++) {
1918         tmp = jj[nzcount++]/bs;   /* block col. index */
1919         if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1920       }
1921       rowcount++;
1922     }
1923     s_browlengths[i] += nmask;
1924 
1925     /* zero out the mask elements we set */
1926     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1927   }
1928 
1929   /* create our matrix */
1930   ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr);
1931   ierr = MatSetType(B,type);CHKERRQ(ierr);
1932   ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr);
1933   a = (Mat_SeqSBAIJ*)B->data;
1934 
1935   /* set matrix "i" values */
1936   a->i[0] = 0;
1937   for (i=1; i<= mbs; i++) {
1938     a->i[i]      = a->i[i-1] + s_browlengths[i-1];
1939     a->ilen[i-1] = s_browlengths[i-1];
1940   }
1941   a->s_nz = a->i[mbs];
1942 
1943   /* read in nonzero values */
1944   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1945   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1946   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1947 
1948   /* set "a" and "j" values into matrix */
1949   nzcount = 0; jcount = 0;
1950   for (i=0; i<mbs; i++) {
1951     nzcountb = nzcount;
1952     nmask    = 0;
1953     for (j=0; j<bs; j++) {
1954       kmax = rowlengths[i*bs+j];
1955       for (k=0; k<kmax; k++) {
1956         tmp = jj[nzcount++]/bs; /* block col. index */
1957         if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1958       }
1959     }
1960     /* sort the masked values */
1961     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1962 
1963     /* set "j" values into matrix */
1964     maskcount = 1;
1965     for (j=0; j<nmask; j++) {
1966       a->j[jcount++]  = masked[j];
1967       mask[masked[j]] = maskcount++;
1968     }
1969 
1970     /* set "a" values into matrix */
1971     ishift = bs2*a->i[i];
1972     for (j=0; j<bs; j++) {
1973       kmax = rowlengths[i*bs+j];
1974       for (k=0; k<kmax; k++) {
1975         tmp       = jj[nzcountb]/bs ; /* block col. index */
1976         if (tmp >= i){
1977           block     = mask[tmp] - 1;
1978           point     = jj[nzcountb] - bs*tmp;
1979           idx       = ishift + bs2*block + j + bs*point;
1980           a->a[idx] = aa[nzcountb];
1981         }
1982         nzcountb++;
1983       }
1984     }
1985     /* zero out the mask elements we set */
1986     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1987   }
1988   if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1989 
1990   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1991   ierr = PetscFree(s_browlengths);CHKERRQ(ierr);
1992   ierr = PetscFree(aa);CHKERRQ(ierr);
1993   ierr = PetscFree(jj);CHKERRQ(ierr);
1994   ierr = PetscFree(mask);CHKERRQ(ierr);
1995 
1996   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   ierr = MatView_Private(B);CHKERRQ(ierr);
1999   *A = B;
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatRelax_SeqSBAIJ"
2005 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2006 {
2007   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2008   MatScalar    *aa=a->a,*v,*v1;
2009   PetscScalar  *x,*b,*t,sum,d;
2010   int          m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
2011   int          nz,nz1,*vj,*vj1,i;
2012 
2013   PetscFunctionBegin;
2014   its = its*lits;
2015   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2016 
2017   if (bs > 1)
2018     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2019 
2020   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2021   if (xx != bb) {
2022     ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2023   } else {
2024     b = x;
2025   }
2026 
2027   ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr);
2028 
2029   if (flag & SOR_ZERO_INITIAL_GUESS) {
2030     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2031       for (i=0; i<m; i++)
2032         t[i] = b[i];
2033 
2034       for (i=0; i<m; i++){
2035         d  = *(aa + ai[i]);  /* diag[i] */
2036         v  = aa + ai[i] + 1;
2037         vj = aj + ai[i] + 1;
2038         nz = ai[i+1] - ai[i] - 1;
2039         x[i] = omega*t[i]/d;
2040         while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2041         PetscLogFlops(2*nz-1);
2042       }
2043     }
2044 
2045     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2046       for (i=0; i<m; i++)
2047         t[i] = b[i];
2048 
2049       for (i=0; i<m-1; i++){  /* update rhs */
2050         v  = aa + ai[i] + 1;
2051         vj = aj + ai[i] + 1;
2052         nz = ai[i+1] - ai[i] - 1;
2053         while (nz--) t[*vj++] -= x[i]*(*v++);
2054         PetscLogFlops(2*nz-1);
2055       }
2056       for (i=m-1; i>=0; i--){
2057         d  = *(aa + ai[i]);
2058         v  = aa + ai[i] + 1;
2059         vj = aj + ai[i] + 1;
2060         nz = ai[i+1] - ai[i] - 1;
2061         sum = t[i];
2062         while (nz--) sum -= x[*vj++]*(*v++);
2063         PetscLogFlops(2*nz-1);
2064         x[i] =   (1-omega)*x[i] + omega*sum/d;
2065       }
2066     }
2067     its--;
2068   }
2069 
2070   while (its--) {
2071     /*
2072        forward sweep:
2073        for i=0,...,m-1:
2074          sum[i] = (b[i] - U(i,:)x )/d[i];
2075          x[i]   = (1-omega)x[i] + omega*sum[i];
2076          b      = b - x[i]*U^T(i,:);
2077 
2078     */
2079     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2080       for (i=0; i<m; i++)
2081         t[i] = b[i];
2082 
2083       for (i=0; i<m; i++){
2084         d  = *(aa + ai[i]);  /* diag[i] */
2085         v  = aa + ai[i] + 1; v1=v;
2086         vj = aj + ai[i] + 1; vj1=vj;
2087         nz = ai[i+1] - ai[i] - 1; nz1=nz;
2088         sum = t[i];
2089         while (nz1--) sum -= (*v1++)*x[*vj1++];
2090         x[i] = (1-omega)*x[i] + omega*sum/d;
2091         while (nz--) t[*vj++] -= x[i]*(*v++);
2092         PetscLogFlops(4*nz-2);
2093       }
2094     }
2095 
2096   if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2097       /*
2098        backward sweep:
2099        b = b - x[i]*U^T(i,:), i=0,...,n-2
2100        for i=m-1,...,0:
2101          sum[i] = (b[i] - U(i,:)x )/d[i];
2102          x[i]   = (1-omega)x[i] + omega*sum[i];
2103       */
2104       for (i=0; i<m; i++)
2105         t[i] = b[i];
2106 
2107       for (i=0; i<m-1; i++){  /* update rhs */
2108         v  = aa + ai[i] + 1;
2109         vj = aj + ai[i] + 1;
2110         nz = ai[i+1] - ai[i] - 1;
2111         while (nz--) t[*vj++] -= x[i]*(*v++);
2112         PetscLogFlops(2*nz-1);
2113       }
2114       for (i=m-1; i>=0; i--){
2115         d  = *(aa + ai[i]);
2116         v  = aa + ai[i] + 1;
2117         vj = aj + ai[i] + 1;
2118         nz = ai[i+1] - ai[i] - 1;
2119         sum = t[i];
2120         while (nz--) sum -= x[*vj++]*(*v++);
2121         PetscLogFlops(2*nz-1);
2122         x[i] =   (1-omega)*x[i] + omega*sum/d;
2123       }
2124     }
2125   }
2126 
2127   ierr = PetscFree(t); CHKERRQ(ierr);
2128   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2129   if (bb != xx) {
2130     ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2131   }
2132 
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 
2137 
2138 
2139 
2140 
2141