xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3a40ed3dce77c081171d005ae1a6ff4bb9d13b6f)
1 
2 #ifdef PETSC_RCS_HEADER
3 static char vcid[] = "$Id: baij.c,v 1.113 1997/10/01 22:39:40 bsmith Exp bsmith $";
4 #endif
5 
6 /*
7     Defines the basic matrix operations for the BAIJ (compressed row)
8   matrix storage format.
9 */
10 
11 #include "pinclude/pviewer.h"
12 #include "sys.h"
13 #include "src/mat/impls/baij/seq/baij.h"
14 #include "src/vec/vecimpl.h"
15 #include "src/inline/spops.h"
16 #include "petsc.h"
17 
18 
19 /*
20      Adds diagonal pointers to sparse matrix structure.
21 */
22 
23 #undef __FUNC__
24 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
25 int MatMarkDiag_SeqBAIJ(Mat A)
26 {
27   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
28   int         i,j, *diag, m = a->mbs;
29 
30   PetscFunctionBegin;
31   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
32   PLogObjectMemory(A,(m+1)*sizeof(int));
33   for ( i=0; i<m; i++ ) {
34     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
35       if (a->j[j] == i) {
36         diag[i] = j;
37         break;
38       }
39     }
40   }
41   a->diag = diag;
42   PetscFunctionReturn(0);
43 }
44 
45 
46 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
47 
48 #undef __FUNC__
49 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
50 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
51                             PetscTruth *done)
52 {
53   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
54   int         ierr, n = a->mbs,i;
55 
56   PetscFunctionBegin;
57   *nn = n;
58   if (!ia) PetscFunctionReturn(0);
59   if (symmetric) {
60     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
61   } else if (oshift == 1) {
62     /* temporarily add 1 to i and j indices */
63     int nz = a->i[n] + 1;
64     for ( i=0; i<nz; i++ ) a->j[i]++;
65     for ( i=0; i<n+1; i++ ) a->i[i]++;
66     *ia = a->i; *ja = a->j;
67   } else {
68     *ia = a->i; *ja = a->j;
69   }
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNC__
75 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
76 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
77                                 PetscTruth *done)
78 {
79   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80   int         i,n = a->mbs;
81 
82   PetscFunctionBegin;
83   if (!ia) PetscFunctionReturn(0);
84   if (symmetric) {
85     PetscFree(*ia);
86     PetscFree(*ja);
87   } else if (oshift == 1) {
88     int nz = a->i[n];
89     for ( i=0; i<nz; i++ ) a->j[i]--;
90     for ( i=0; i<n+1; i++ ) a->i[i]--;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 
96 #undef __FUNC__
97 #define __FUNC__ "MatView_SeqBAIJ_Binary"
98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
99 {
100   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
101   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102   Scalar      *aa;
103 
104   PetscFunctionBegin;
105   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
106   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
107   col_lens[0] = MAT_COOKIE;
108 
109   col_lens[1] = a->m;
110   col_lens[2] = a->n;
111   col_lens[3] = a->nz*bs2;
112 
113   /* store lengths of each row and write (including header) to file */
114   count = 0;
115   for ( i=0; i<a->mbs; i++ ) {
116     for ( j=0; j<bs; j++ ) {
117       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118     }
119   }
120   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
121   PetscFree(col_lens);
122 
123   /* store column indices (zero start index) */
124   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
125   count = 0;
126   for ( i=0; i<a->mbs; i++ ) {
127     for ( j=0; j<bs; j++ ) {
128       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129         for ( l=0; l<bs; l++ ) {
130           jj[count++] = bs*a->j[k] + l;
131         }
132       }
133     }
134   }
135   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
136   PetscFree(jj);
137 
138   /* store nonzero values */
139   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140   count = 0;
141   for ( i=0; i<a->mbs; i++ ) {
142     for ( j=0; j<bs; j++ ) {
143       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144         for ( l=0; l<bs; l++ ) {
145           aa[count++] = a->a[bs2*k + l*bs + j];
146         }
147       }
148     }
149   }
150   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
151   PetscFree(aa);
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNC__
156 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
157 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
158 {
159   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
160   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
161   FILE        *fd;
162   char        *outputname;
163 
164   PetscFunctionBegin;
165   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
166   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
167   ierr = ViewerGetFormat(viewer,&format);
168   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
169     fprintf(fd,"  block size is %d\n",bs);
170   }
171   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
172     SETERRQ(1,0,"Matlab format not supported");
173   }
174   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
175     for ( i=0; i<a->mbs; i++ ) {
176       for ( j=0; j<bs; j++ ) {
177         fprintf(fd,"row %d:",i*bs+j);
178         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
179           for ( l=0; l<bs; l++ ) {
180 #if defined(USE_PETSC_COMPLEX)
181           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
182             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
183               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
184           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
185             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
186               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
187           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
188             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
189 #else
190           if (a->a[bs2*k + l*bs + j] != 0.0)
191             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
192 #endif
193           }
194         }
195         fprintf(fd,"\n");
196       }
197     }
198   }
199   else {
200     for ( i=0; i<a->mbs; i++ ) {
201       for ( j=0; j<bs; j++ ) {
202         fprintf(fd,"row %d:",i*bs+j);
203         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
204           for ( l=0; l<bs; l++ ) {
205 #if defined(USE_PETSC_COMPLEX)
206           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
207             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
208               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
209           }
210           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
211             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
212               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
213           }
214           else {
215             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
216           }
217 #else
218             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
219 #endif
220           }
221         }
222         fprintf(fd,"\n");
223       }
224     }
225   }
226   fflush(fd);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatView_SeqBAIJ_Draw"
232 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
233 {
234   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
235   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
236   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
237   Scalar       *aa;
238   Draw         draw;
239   DrawButton   button;
240   PetscTruth   isnull;
241 
242   PetscFunctionBegin;
243   ierr = ViewerDrawGetDraw(viewer,&draw);CHKERRQ(ierr);
244   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
245 
246   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
247   xr += w;    yr += h;  xl = -w;     yl = -h;
248   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
249   /* loop over matrix elements drawing boxes */
250   color = DRAW_BLUE;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) >=  0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264   color = DRAW_CYAN;
265   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
266     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
267       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
268       x_l = a->j[j]*bs; x_r = x_l + 1.0;
269       aa = a->a + j*bs2;
270       for ( k=0; k<bs; k++ ) {
271         for ( l=0; l<bs; l++ ) {
272           if (PetscReal(*aa++) != 0.) continue;
273           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
274         }
275       }
276     }
277   }
278 
279   color = DRAW_RED;
280   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
281     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
282       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
283       x_l = a->j[j]*bs; x_r = x_l + 1.0;
284       aa = a->a + j*bs2;
285       for ( k=0; k<bs; k++ ) {
286         for ( l=0; l<bs; l++ ) {
287           if (PetscReal(*aa++) <= 0.) continue;
288           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
289         }
290       }
291     }
292   }
293 
294   DrawFlush(draw);
295   DrawGetPause(draw,&pause);
296   if (pause >= 0) { PetscSleep(pause); PetscFunctionReturn(0);}
297 
298   /* allow the matrix to zoom or shrink */
299   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
300   while (button != BUTTON_RIGHT) {
301     DrawClear(draw);
302     if (button == BUTTON_LEFT) scale = .5;
303     else if (button == BUTTON_CENTER) scale = 2.;
304     xl = scale*(xl + w - xc) + xc - w*scale;
305     xr = scale*(xr - w - xc) + xc + w*scale;
306     yl = scale*(yl + h - yc) + yc - h*scale;
307     yr = scale*(yr - h - yc) + yc + h*scale;
308     w *= scale; h *= scale;
309     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
310     color = DRAW_BLUE;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318             if (PetscReal(*aa++) >=  0.) continue;
319             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324     color = DRAW_CYAN;
325     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
326       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
327         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
328         x_l = a->j[j]*bs; x_r = x_l + 1.0;
329         aa = a->a + j*bs2;
330         for ( k=0; k<bs; k++ ) {
331           for ( l=0; l<bs; l++ ) {
332           if (PetscReal(*aa++) != 0.) continue;
333           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
334           }
335         }
336       }
337     }
338 
339     color = DRAW_RED;
340     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
341       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
342         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
343         x_l = a->j[j]*bs; x_r = x_l + 1.0;
344         aa = a->a + j*bs2;
345         for ( k=0; k<bs; k++ ) {
346           for ( l=0; l<bs; l++ ) {
347             if (PetscReal(*aa++) <= 0.) continue;
348             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
349           }
350         }
351       }
352     }
353     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNC__
359 #define __FUNC__ "MatView_SeqBAIJ"
360 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
361 {
362   Mat         A = (Mat) obj;
363   ViewerType  vtype;
364   int         ierr;
365 
366   PetscFunctionBegin;
367   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
368   if (vtype == MATLAB_VIEWER) {
369     SETERRQ(1,0,"Matlab viewer not supported");
370   } else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
371     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
372   } else if (vtype == BINARY_FILE_VIEWER) {
373     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
374   } else if (vtype == DRAW_VIEWER) {
375     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define CHUNKSIZE  10
381 
382 #undef __FUNC__
383 #define __FUNC__ "MatSetValues_SeqBAIJ"
384 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
385 {
386   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
387   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
388   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
389   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
390   int          ridx,cidx,bs2=a->bs2;
391   Scalar      *ap,value,*aa=a->a,*bap;
392 
393   PetscFunctionBegin;
394   for ( k=0; k<m; k++ ) { /* loop over added rows */
395     row  = im[k]; brow = row/bs;
396 #if defined(USE_PETSC_BOPT_g)
397     if (row < 0) SETERRQ(1,0,"Negative row");
398     if (row >= a->m) SETERRQ(1,0,"Row too large");
399 #endif
400     rp   = aj + ai[brow];
401     ap   = aa + bs2*ai[brow];
402     rmax = imax[brow];
403     nrow = ailen[brow];
404     low  = 0;
405     for ( l=0; l<n; l++ ) { /* loop over added columns */
406 #if defined(USE_PETSC_BOPT_g)
407       if (in[l] < 0) SETERRQ(1,0,"Negative column");
408       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
409 #endif
410       col = in[l]; bcol = col/bs;
411       ridx = row % bs; cidx = col % bs;
412       if (roworiented) {
413         value = *v++;
414       } else {
415         value = v[k + l*m];
416       }
417       if (!sorted) low = 0; high = nrow;
418       while (high-low > 7) {
419         t = (low+high)/2;
420         if (rp[t] > bcol) high = t;
421         else              low  = t;
422       }
423       for ( i=low; i<high; i++ ) {
424         if (rp[i] > bcol) break;
425         if (rp[i] == bcol) {
426           bap  = ap +  bs2*i + bs*cidx + ridx;
427           if (is == ADD_VALUES) *bap += value;
428           else                  *bap  = value;
429           goto noinsert1;
430         }
431       }
432       if (nonew == 1) goto noinsert1;
433       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
434       if (nrow >= rmax) {
435         /* there is no extra room in row, therefore enlarge */
436         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
437         Scalar *new_a;
438 
439         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
440 
441         /* Malloc new storage space */
442         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
443         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
444         new_j   = (int *) (new_a + bs2*new_nz);
445         new_i   = new_j + new_nz;
446 
447         /* copy over old data into new slots */
448         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
449         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
450         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
451         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
452         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
453                                                            len*sizeof(int));
454         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
455         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
456         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
457                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
458         /* free up old matrix storage */
459         PetscFree(a->a);
460         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
461         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
462         a->singlemalloc = 1;
463 
464         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
465         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
466         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
467         a->maxnz += bs2*CHUNKSIZE;
468         a->reallocs++;
469         a->nz++;
470       }
471       N = nrow++ - 1;
472       /* shift up all the later entries in this row */
473       for ( ii=N; ii>=i; ii-- ) {
474         rp[ii+1] = rp[ii];
475         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
476       }
477       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
478       rp[i]                      = bcol;
479       ap[bs2*i + bs*cidx + ridx] = value;
480       noinsert1:;
481       low = i;
482     }
483     ailen[brow] = nrow;
484   }
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNC__
489 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
490 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
491 {
492   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
493   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
494   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
495   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
496   Scalar      *ap,*value=v,*aa=a->a,*bap;
497 
498   PetscFunctionBegin;
499   if (roworiented) {
500     stepval = (n-1)*bs;
501   } else {
502     stepval = (m-1)*bs;
503   }
504   for ( k=0; k<m; k++ ) { /* loop over added rows */
505     row  = im[k];
506 #if defined(USE_PETSC_BOPT_g)
507     if (row < 0) SETERRQ(1,0,"Negative row");
508     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
509 #endif
510     rp   = aj + ai[row];
511     ap   = aa + bs2*ai[row];
512     rmax = imax[row];
513     nrow = ailen[row];
514     low  = 0;
515     for ( l=0; l<n; l++ ) { /* loop over added columns */
516 #if defined(USE_PETSC_BOPT_g)
517       if (in[l] < 0) SETERRQ(1,0,"Negative column");
518       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
519 #endif
520       col = in[l];
521       if (roworiented) {
522         value = v + k*(stepval+bs)*bs + l*bs;
523       } else {
524         value = v + l*(stepval+bs)*bs + k*bs;
525       }
526       if (!sorted) low = 0; high = nrow;
527       while (high-low > 7) {
528         t = (low+high)/2;
529         if (rp[t] > col) high = t;
530         else             low  = t;
531       }
532       for ( i=low; i<high; i++ ) {
533         if (rp[i] > col) break;
534         if (rp[i] == col) {
535           bap  = ap +  bs2*i;
536           if (roworiented) {
537             if (is == ADD_VALUES) {
538               for ( ii=0; ii<bs; ii++,value+=stepval ) {
539                 for (jj=ii; jj<bs2; jj+=bs ) {
540                   bap[jj] += *value++;
541                 }
542               }
543             } else {
544               for ( ii=0; ii<bs; ii++,value+=stepval ) {
545                 for (jj=ii; jj<bs2; jj+=bs ) {
546                   bap[jj] = *value++;
547                 }
548               }
549             }
550           } else {
551             if (is == ADD_VALUES) {
552               for ( ii=0; ii<bs; ii++,value+=stepval ) {
553                 for (jj=0; jj<bs; jj++ ) {
554                   *bap++ += *value++;
555                 }
556               }
557             } else {
558               for ( ii=0; ii<bs; ii++,value+=stepval ) {
559                 for (jj=0; jj<bs; jj++ ) {
560                   *bap++  = *value++;
561                 }
562               }
563             }
564           }
565           goto noinsert2;
566         }
567       }
568       if (nonew == 1) goto noinsert2;
569       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
570       if (nrow >= rmax) {
571         /* there is no extra room in row, therefore enlarge */
572         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
573         Scalar *new_a;
574 
575         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
576 
577         /* malloc new storage space */
578         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
579         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
580         new_j   = (int *) (new_a + bs2*new_nz);
581         new_i   = new_j + new_nz;
582 
583         /* copy over old data into new slots */
584         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
585         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
586         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
587         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
588         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
589         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
590         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
591         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
592         /* free up old matrix storage */
593         PetscFree(a->a);
594         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
595         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
596         a->singlemalloc = 1;
597 
598         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
599         rmax = imax[row] = imax[row] + CHUNKSIZE;
600         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
601         a->maxnz += bs2*CHUNKSIZE;
602         a->reallocs++;
603         a->nz++;
604       }
605       N = nrow++ - 1;
606       /* shift up all the later entries in this row */
607       for ( ii=N; ii>=i; ii-- ) {
608         rp[ii+1] = rp[ii];
609         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
610       }
611       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
612       rp[i] = col;
613       bap   = ap +  bs2*i;
614       if (roworiented) {
615         for ( ii=0; ii<bs; ii++,value+=stepval) {
616           for (jj=ii; jj<bs2; jj+=bs ) {
617             bap[jj] = *value++;
618           }
619         }
620       } else {
621         for ( ii=0; ii<bs; ii++,value+=stepval ) {
622           for (jj=0; jj<bs; jj++ ) {
623             *bap++  = *value++;
624           }
625         }
626       }
627       noinsert2:;
628       low = i;
629     }
630     ailen[row] = nrow;
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNC__
636 #define __FUNC__ "MatGetSize_SeqBAIJ"
637 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
638 {
639   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
640 
641   PetscFunctionBegin;
642   if (m) *m = a->m;
643   if (n) *n = a->n;
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNC__
648 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
649 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
650 {
651   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
652 
653   PetscFunctionBegin;
654   *m = 0; *n = a->m;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ "MatGetRow_SeqBAIJ"
660 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
661 {
662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
663   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
664   Scalar      *aa,*v_i,*aa_i;
665 
666   PetscFunctionBegin;
667   bs  = a->bs;
668   ai  = a->i;
669   aj  = a->j;
670   aa  = a->a;
671   bs2 = a->bs2;
672 
673   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
674 
675   bn  = row/bs;   /* Block number */
676   bp  = row % bs; /* Block Position */
677   M   = ai[bn+1] - ai[bn];
678   *nz = bs*M;
679 
680   if (v) {
681     *v = 0;
682     if (*nz) {
683       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
684       for ( i=0; i<M; i++ ) { /* for each block in the block row */
685         v_i  = *v + i*bs;
686         aa_i = aa + bs2*(ai[bn] + i);
687         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
688       }
689     }
690   }
691 
692   if (idx) {
693     *idx = 0;
694     if (*nz) {
695       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
696       for ( i=0; i<M; i++ ) { /* for each block in the block row */
697         idx_i = *idx + i*bs;
698         itmp  = bs*aj[ai[bn] + i];
699         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
700       }
701     }
702   }
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNC__
707 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
708 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
709 {
710   PetscFunctionBegin;
711   if (idx) {if (*idx) PetscFree(*idx);}
712   if (v)   {if (*v)   PetscFree(*v);}
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNC__
717 #define __FUNC__ "MatTranspose_SeqBAIJ"
718 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
719 {
720   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
721   Mat         C;
722   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
723   int         *rows,*cols,bs2=a->bs2;
724   Scalar      *array=a->a;
725 
726   PetscFunctionBegin;
727   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(1,0,"Square matrix only for in-place");
728   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
729   PetscMemzero(col,(1+nbs)*sizeof(int));
730 
731   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
732   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
733   PetscFree(col);
734   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
735   cols = rows + bs;
736   for ( i=0; i<mbs; i++ ) {
737     cols[0] = i*bs;
738     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
739     len = ai[i+1] - ai[i];
740     for ( j=0; j<len; j++ ) {
741       rows[0] = (*aj++)*bs;
742       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
743       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
744       array += bs2;
745     }
746   }
747   PetscFree(rows);
748 
749   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
751 
752   if (B != PETSC_NULL) {
753     *B = C;
754   } else {
755     /* This isn't really an in-place transpose */
756     PetscFree(a->a);
757     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
758     if (a->diag) PetscFree(a->diag);
759     if (a->ilen) PetscFree(a->ilen);
760     if (a->imax) PetscFree(a->imax);
761     if (a->solve_work) PetscFree(a->solve_work);
762     PetscFree(a);
763     PetscMemcpy(A,C,sizeof(struct _p_Mat));
764     PetscHeaderDestroy(C);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 
770 #undef __FUNC__
771 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
772 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
773 {
774   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
775   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
776   int        m = a->m,*ip, N, *ailen = a->ilen;
777   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
778   Scalar     *aa = a->a, *ap;
779 
780   PetscFunctionBegin;
781   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
782 
783   if (m) rmax = ailen[0];
784   for ( i=1; i<mbs; i++ ) {
785     /* move each row back by the amount of empty slots (fshift) before it*/
786     fshift += imax[i-1] - ailen[i-1];
787     rmax   = PetscMax(rmax,ailen[i]);
788     if (fshift) {
789       ip = aj + ai[i]; ap = aa + bs2*ai[i];
790       N = ailen[i];
791       for ( j=0; j<N; j++ ) {
792         ip[j-fshift] = ip[j];
793         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
794       }
795     }
796     ai[i] = ai[i-1] + ailen[i-1];
797   }
798   if (mbs) {
799     fshift += imax[mbs-1] - ailen[mbs-1];
800     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
801   }
802   /* reset ilen and imax for each row */
803   for ( i=0; i<mbs; i++ ) {
804     ailen[i] = imax[i] = ai[i+1] - ai[i];
805   }
806   a->nz = ai[mbs];
807 
808   /* diagonals may have moved, so kill the diagonal pointers */
809   if (fshift && a->diag) {
810     PetscFree(a->diag);
811     PLogObjectMemory(A,-(m+1)*sizeof(int));
812     a->diag = 0;
813   }
814   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
815            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
816   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
817            a->reallocs);
818   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
819   a->reallocs          = 0;
820   A->info.nz_unneeded  = (double)fshift*bs2;
821 
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
827 int MatZeroEntries_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
830 
831   PetscFunctionBegin;
832   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNC__
837 #define __FUNC__ "MatDestroy_SeqBAIJ"
838 int MatDestroy_SeqBAIJ(PetscObject obj)
839 {
840   Mat         A  = (Mat) obj;
841   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
842 
843 #if defined(USE_PETSC_LOG)
844   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
845 #endif
846   PetscFree(a->a);
847   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
848   if (a->diag) PetscFree(a->diag);
849   if (a->ilen) PetscFree(a->ilen);
850   if (a->imax) PetscFree(a->imax);
851   if (a->solve_work) PetscFree(a->solve_work);
852   if (a->mult_work) PetscFree(a->mult_work);
853   PetscFree(a);
854   PLogObjectDestroy(A);
855   PetscHeaderDestroy(A);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNC__
860 #define __FUNC__ "MatSetOption_SeqBAIJ"
861 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
862 {
863   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
864 
865   PetscFunctionBegin;
866   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
867   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
868   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
869   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
870   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
871   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
872   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
873   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
874   else if (op == MAT_ROWS_SORTED ||
875            op == MAT_ROWS_UNSORTED ||
876            op == MAT_SYMMETRIC ||
877            op == MAT_STRUCTURALLY_SYMMETRIC ||
878            op == MAT_YES_NEW_DIAGONALS ||
879            op == MAT_IGNORE_OFF_PROC_ENTRIES) {
880     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
881   } else if (op == MAT_NO_NEW_DIAGONALS) {
882     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
883   } else {
884     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 
890 /* -------------------------------------------------------*/
891 /* Should check that shapes of vectors and matrices match */
892 /* -------------------------------------------------------*/
893 #include "pinclude/plapack.h"
894 
895 #undef __FUNC__
896 #define __FUNC__ "MatMult_SeqBAIJ_1"
897 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
898 {
899   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
900   register Scalar *x,*z,*v,sum;
901   int             mbs=a->mbs,i,*idx,*ii,n;
902 
903   PetscFunctionBegin;
904   VecGetArray_Fast(xx,x);
905   VecGetArray_Fast(zz,z);
906 
907   idx   = a->j;
908   v     = a->a;
909   ii    = a->i;
910 
911   for ( i=0; i<mbs; i++ ) {
912     n    = ii[1] - ii[0]; ii++;
913     sum  = 0.0;
914     while (n--) sum += *v++ * x[*idx++];
915     z[i] = sum;
916   }
917   VecRestoreArray_Fast(xx,x);
918   VecRestoreArray_Fast(zz,z);
919   PLogFlops(2*a->nz - a->m);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNC__
924 #define __FUNC__ "MatMult_SeqBAIJ_2"
925 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
926 {
927   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
928   register Scalar *x,*z,*v,*xb,sum1,sum2;
929   register Scalar x1,x2;
930   int             mbs=a->mbs,i,*idx,*ii,j,n;
931 
932   PetscFunctionBegin;
933   VecGetArray_Fast(xx,x);
934   VecGetArray_Fast(zz,z);
935 
936   idx   = a->j;
937   v     = a->a;
938   ii    = a->i;
939 
940   for ( i=0; i<mbs; i++ ) {
941     n  = ii[1] - ii[0]; ii++;
942     sum1 = 0.0; sum2 = 0.0;
943     for ( j=0; j<n; j++ ) {
944       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
945       sum1 += v[0]*x1 + v[2]*x2;
946       sum2 += v[1]*x1 + v[3]*x2;
947       v += 4;
948     }
949     z[0] = sum1; z[1] = sum2;
950     z += 2;
951   }
952   VecRestoreArray_Fast(xx,x);
953   VecRestoreArray_Fast(zz,z);
954   PLogFlops(4*a->nz - a->m);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNC__
959 #define __FUNC__ "MatMult_SeqBAIJ_3"
960 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
961 {
962   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
963   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
964   int             mbs=a->mbs,i,*idx,*ii,j,n;
965 
966   PetscFunctionBegin;
967   VecGetArray_Fast(xx,x);
968   VecGetArray_Fast(zz,z);
969 
970   idx   = a->j;
971   v     = a->a;
972   ii    = a->i;
973 
974   for ( i=0; i<mbs; i++ ) {
975     n  = ii[1] - ii[0]; ii++;
976     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
977     for ( j=0; j<n; j++ ) {
978       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
979       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
980       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
981       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
982       v += 9;
983     }
984     z[0] = sum1; z[1] = sum2; z[2] = sum3;
985     z += 3;
986   }
987   VecRestoreArray_Fast(xx,x);
988   VecRestoreArray_Fast(zz,z);
989   PLogFlops(18*a->nz - a->m);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNC__
994 #define __FUNC__ "MatMult_SeqBAIJ_4"
995 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
996 {
997   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
998   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
999   int             mbs=a->mbs,i,*idx,*ii,j,n;
1000 
1001   PetscFunctionBegin;
1002   VecGetArray_Fast(xx,x);
1003   VecGetArray_Fast(zz,z);
1004 
1005   idx   = a->j;
1006   v     = a->a;
1007   ii    = a->i;
1008 
1009   for ( i=0; i<mbs; i++ ) {
1010     n  = ii[1] - ii[0]; ii++;
1011     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
1012     for ( j=0; j<n; j++ ) {
1013       xb = x + 4*(*idx++);
1014       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1015       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1016       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1017       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1018       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1019       v += 16;
1020     }
1021     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1022     z += 4;
1023   }
1024   VecRestoreArray_Fast(xx,x);
1025   VecRestoreArray_Fast(zz,z);
1026   PLogFlops(32*a->nz - a->m);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNC__
1031 #define __FUNC__ "MatMult_SeqBAIJ_5"
1032 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
1033 {
1034   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1035   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1036   int             mbs=a->mbs,i,*idx,*ii,j,n;
1037 
1038   VecGetArray_Fast(xx,x);
1039   VecGetArray_Fast(zz,z);
1040 
1041   idx   = a->j;
1042   v     = a->a;
1043   ii    = a->i;
1044 
1045   for ( i=0; i<mbs; i++ ) {
1046     n  = ii[1] - ii[0]; ii++;
1047     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1048     for ( j=0; j<n; j++ ) {
1049       xb = x + 5*(*idx++);
1050       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1051       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1052       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1053       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1054       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1055       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1056       v += 25;
1057     }
1058     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1059     z += 5;
1060   }
1061   VecRestoreArray_Fast(xx,x);
1062   VecRestoreArray_Fast(zz,z);
1063   PLogFlops(50*a->nz - a->m);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ "MatMult_SeqBAIJ_7"
1069 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1070 {
1071   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1072   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1073   register Scalar x1,x2,x3,x4,x5,x6,x7;
1074   int             mbs=a->mbs,i,*idx,*ii,j,n;
1075 
1076   VecGetArray_Fast(xx,x);
1077   VecGetArray_Fast(zz,z);
1078 
1079   idx   = a->j;
1080   v     = a->a;
1081   ii    = a->i;
1082 
1083   for ( i=0; i<mbs; i++ ) {
1084     n  = ii[1] - ii[0]; ii++;
1085     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1086     for ( j=0; j<n; j++ ) {
1087       xb = x + 7*(*idx++);
1088       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1089       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1090       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1091       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1092       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1093       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1094       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1095       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1096       v += 49;
1097     }
1098     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1099     z += 7;
1100   }
1101 
1102   VecRestoreArray_Fast(xx,x);
1103   VecRestoreArray_Fast(zz,z);
1104   PLogFlops(98*a->nz - a->m);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNC__
1109 #define __FUNC__ "MatMult_SeqBAIJ_N"
1110 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1111 {
1112   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1113   register Scalar *x,*z,*v,*xb;
1114   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1115   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1116   int             _One = 1,ncols,k;
1117 
1118   PetscFunctionBegin;
1119   VecGetArray_Fast(xx,x);
1120   VecGetArray_Fast(zz,z);
1121 
1122   idx   = a->j;
1123   v     = a->a;
1124   ii    = a->i;
1125 
1126 
1127   if (!a->mult_work) {
1128     k = PetscMax(a->m,a->n);
1129     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1130   }
1131   work = a->mult_work;
1132   for ( i=0; i<mbs; i++ ) {
1133     n     = ii[1] - ii[0]; ii++;
1134     ncols = n*bs;
1135     workt = work;
1136     for ( j=0; j<n; j++ ) {
1137       xb = x + bs*(*idx++);
1138       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1139       workt += bs;
1140     }
1141     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1142     v += n*bs2;
1143     z += bs;
1144   }
1145   VecRestoreArray_Fast(xx,x);
1146   VecRestoreArray_Fast(zz,z);
1147   PLogFlops(2*a->nz*bs2 - a->m);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNC__
1152 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1153 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1154 {
1155   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1156   register Scalar *x,*y,*z,*v,sum;
1157   int             mbs=a->mbs,i,*idx,*ii,n;
1158 
1159   PetscFunctionBegin;
1160   VecGetArray_Fast(xx,x);
1161   VecGetArray_Fast(yy,y);
1162   VecGetArray_Fast(zz,z);
1163 
1164   idx   = a->j;
1165   v     = a->a;
1166   ii    = a->i;
1167 
1168   for ( i=0; i<mbs; i++ ) {
1169     n    = ii[1] - ii[0]; ii++;
1170     sum  = y[i];
1171     while (n--) sum += *v++ * x[*idx++];
1172     z[i] = sum;
1173   }
1174   VecRestoreArray_Fast(xx,x);
1175   VecRestoreArray_Fast(yy,y);
1176   VecRestoreArray_Fast(zz,z);
1177   PLogFlops(2*a->nz);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNC__
1182 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1183 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1184 {
1185   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1186   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1187   register Scalar x1,x2;
1188   int             mbs=a->mbs,i,*idx,*ii,j,n;
1189 
1190   PetscFunctionBegin;
1191   VecGetArray_Fast(xx,x);
1192   VecGetArray_Fast(yy,y);
1193   VecGetArray_Fast(zz,z);
1194 
1195   idx   = a->j;
1196   v     = a->a;
1197   ii    = a->i;
1198 
1199   for ( i=0; i<mbs; i++ ) {
1200     n  = ii[1] - ii[0]; ii++;
1201     sum1 = y[0]; sum2 = y[1];
1202     for ( j=0; j<n; j++ ) {
1203       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1204       sum1 += v[0]*x1 + v[2]*x2;
1205       sum2 += v[1]*x1 + v[3]*x2;
1206       v += 4;
1207     }
1208     z[0] = sum1; z[1] = sum2;
1209     z += 2; y += 2;
1210   }
1211   VecRestoreArray_Fast(xx,x);
1212   VecRestoreArray_Fast(yy,y);
1213   VecRestoreArray_Fast(zz,z);
1214   PLogFlops(4*a->nz);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNC__
1219 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1220 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1223   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1224   int             mbs=a->mbs,i,*idx,*ii,j,n;
1225 
1226   PetscFunctionBegin;
1227   VecGetArray_Fast(xx,x);
1228   VecGetArray_Fast(yy,y);
1229   VecGetArray_Fast(zz,z);
1230 
1231   idx   = a->j;
1232   v     = a->a;
1233   ii    = a->i;
1234 
1235   for ( i=0; i<mbs; i++ ) {
1236     n  = ii[1] - ii[0]; ii++;
1237     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1238     for ( j=0; j<n; j++ ) {
1239       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1240       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1241       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1242       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1243       v += 9;
1244     }
1245     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1246     z += 3; y += 3;
1247   }
1248   VecRestoreArray_Fast(xx,x);
1249   VecRestoreArray_Fast(yy,y);
1250   VecRestoreArray_Fast(zz,z);
1251   PLogFlops(18*a->nz);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNC__
1256 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1257 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1258 {
1259   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1260   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
1261   int             mbs=a->mbs,i,*idx,*ii;
1262   int             j,n;
1263 
1264   PetscFunctionBegin;
1265   VecGetArray_Fast(xx,x);
1266   VecGetArray_Fast(yy,y);
1267   VecGetArray_Fast(zz,z);
1268 
1269   idx   = a->j;
1270   v     = a->a;
1271   ii    = a->i;
1272 
1273   for ( i=0; i<mbs; i++ ) {
1274     n  = ii[1] - ii[0]; ii++;
1275     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1276     for ( j=0; j<n; j++ ) {
1277       xb = x + 4*(*idx++);
1278       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1279       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1280       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1281       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1282       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1283       v += 16;
1284     }
1285     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1286     z += 4; y += 4;
1287   }
1288   VecRestoreArray_Fast(xx,x);
1289   VecRestoreArray_Fast(yy,y);
1290   VecRestoreArray_Fast(zz,z);
1291   PLogFlops(32*a->nz);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNC__
1296 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1297 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1298 {
1299   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1300   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1301   int             mbs=a->mbs,i,*idx,*ii,j,n;
1302 
1303   PetscFunctionBegin;
1304   VecGetArray_Fast(xx,x);
1305   VecGetArray_Fast(yy,y);
1306   VecGetArray_Fast(zz,z);
1307 
1308   idx   = a->j;
1309   v     = a->a;
1310   ii    = a->i;
1311 
1312   for ( i=0; i<mbs; i++ ) {
1313     n  = ii[1] - ii[0]; ii++;
1314     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1315     for ( j=0; j<n; j++ ) {
1316       xb = x + 5*(*idx++);
1317       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1318       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1319       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1320       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1321       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1322       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1323       v += 25;
1324     }
1325     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1326     z += 5; y += 5;
1327   }
1328   VecRestoreArray_Fast(xx,x);
1329   VecRestoreArray_Fast(yy,y);
1330   VecRestoreArray_Fast(zz,z);
1331   PLogFlops(50*a->nz);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNC__
1336 #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1337 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1338 {
1339   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1340   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1341   register Scalar x1,x2,x3,x4,x5,x6,x7;
1342   int             mbs=a->mbs,i,*idx,*ii,j,n;
1343 
1344   PetscFunctionBegin;
1345   VecGetArray_Fast(xx,x);
1346   VecGetArray_Fast(yy,y);
1347   VecGetArray_Fast(zz,z);
1348 
1349   idx   = a->j;
1350   v     = a->a;
1351   ii    = a->i;
1352 
1353   for ( i=0; i<mbs; i++ ) {
1354     n  = ii[1] - ii[0]; ii++;
1355     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1356     for ( j=0; j<n; j++ ) {
1357       xb = x + 7*(*idx++);
1358       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1359       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1360       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1361       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1362       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1363       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1364       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1365       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1366       v += 49;
1367     }
1368     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1369     z += 7; y += 7;
1370   }
1371   VecRestoreArray_Fast(xx,x);
1372   VecRestoreArray_Fast(yy,y);
1373   VecRestoreArray_Fast(zz,z);
1374   PLogFlops(98*a->nz);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 
1379 #undef __FUNC__
1380 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1381 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1382 {
1383   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1384   register Scalar *x,*z,*v,*xb;
1385   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1386   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1387 
1388   PetscFunctionBegin;
1389   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1390 
1391   VecGetArray_Fast(xx,x);
1392   VecGetArray_Fast(zz,z);
1393 
1394   idx   = a->j;
1395   v     = a->a;
1396   ii    = a->i;
1397 
1398 
1399   if (!a->mult_work) {
1400     k = PetscMax(a->m,a->n);
1401     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1402   }
1403   work = a->mult_work;
1404   for ( i=0; i<mbs; i++ ) {
1405     n     = ii[1] - ii[0]; ii++;
1406     ncols = n*bs;
1407     workt = work;
1408     for ( j=0; j<n; j++ ) {
1409       xb = x + bs*(*idx++);
1410       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1411       workt += bs;
1412     }
1413     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1414     v += n*bs2;
1415     z += bs;
1416   }
1417   VecRestoreArray_Fast(xx,x);
1418   VecRestoreArray_Fast(zz,z);
1419   PLogFlops(2*a->nz*bs2 );
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNC__
1424 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1425 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1426 {
1427   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1428   Scalar          *xg,*zg,*zb;
1429   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1430   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1431   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1432 
1433 
1434   PetscFunctionBegin;
1435   VecGetArray_Fast(xx,xg); x = xg;
1436   VecGetArray_Fast(zz,zg); z = zg;
1437   PetscMemzero(z,N*sizeof(Scalar));
1438 
1439   idx   = a->j;
1440   v     = a->a;
1441   ii    = a->i;
1442 
1443   switch (bs) {
1444   case 1:
1445     for ( i=0; i<mbs; i++ ) {
1446       n  = ii[1] - ii[0]; ii++;
1447       xb = x + i; x1 = xb[0];
1448       ib = idx + ai[i];
1449       for ( j=0; j<n; j++ ) {
1450         rval    = ib[j];
1451         z[rval] += *v++ * x1;
1452       }
1453     }
1454     break;
1455   case 2:
1456     for ( i=0; i<mbs; i++ ) {
1457       n  = ii[1] - ii[0]; ii++;
1458       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1459       ib = idx + ai[i];
1460       for ( j=0; j<n; j++ ) {
1461         rval      = ib[j]*2;
1462         z[rval++] += v[0]*x1 + v[1]*x2;
1463         z[rval++] += v[2]*x1 + v[3]*x2;
1464         v += 4;
1465       }
1466     }
1467     break;
1468   case 3:
1469     for ( i=0; i<mbs; i++ ) {
1470       n  = ii[1] - ii[0]; ii++;
1471       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1472       ib = idx + ai[i];
1473       for ( j=0; j<n; j++ ) {
1474         rval      = ib[j]*3;
1475         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1476         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1477         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1478         v += 9;
1479       }
1480     }
1481     break;
1482   case 4:
1483     for ( i=0; i<mbs; i++ ) {
1484       n  = ii[1] - ii[0]; ii++;
1485       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1486       ib = idx + ai[i];
1487       for ( j=0; j<n; j++ ) {
1488         rval      = ib[j]*4;
1489         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1490         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1491         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1492         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1493         v += 16;
1494       }
1495     }
1496     break;
1497   case 5:
1498     for ( i=0; i<mbs; i++ ) {
1499       n  = ii[1] - ii[0]; ii++;
1500       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1501       x4 = xb[3];   x5 = xb[4];
1502       ib = idx + ai[i];
1503       for ( j=0; j<n; j++ ) {
1504         rval      = ib[j]*5;
1505         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1506         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1507         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1508         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1509         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1510         v += 25;
1511       }
1512     }
1513     break;
1514       /* block sizes larger then 5 by 5 are handled by BLAS */
1515     default: {
1516       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1517       if (!a->mult_work) {
1518         k = PetscMax(a->m,a->n);
1519         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1520         CHKPTRQ(a->mult_work);
1521       }
1522       work = a->mult_work;
1523       for ( i=0; i<mbs; i++ ) {
1524         n     = ii[1] - ii[0]; ii++;
1525         ncols = n*bs;
1526         PetscMemzero(work,ncols*sizeof(Scalar));
1527         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1528         v += n*bs2;
1529         x += bs;
1530         workt = work;
1531         for ( j=0; j<n; j++ ) {
1532           zb = z + bs*(*idx++);
1533           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1534           workt += bs;
1535         }
1536       }
1537     }
1538   }
1539   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1540   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1541   PLogFlops(2*a->nz*a->bs2 - a->n);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNC__
1546 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1547 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1548 {
1549   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1550   Scalar          *xg,*zg,*zb;
1551   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1552   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1553   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1554 
1555   PetscFunctionBegin;
1556   VecGetArray_Fast(xx,xg); x = xg;
1557   VecGetArray_Fast(zz,zg); z = zg;
1558 
1559   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1560   else PetscMemzero(z,N*sizeof(Scalar));
1561 
1562   idx   = a->j;
1563   v     = a->a;
1564   ii    = a->i;
1565 
1566   switch (bs) {
1567   case 1:
1568     for ( i=0; i<mbs; i++ ) {
1569       n  = ii[1] - ii[0]; ii++;
1570       xb = x + i; x1 = xb[0];
1571       ib = idx + ai[i];
1572       for ( j=0; j<n; j++ ) {
1573         rval    = ib[j];
1574         z[rval] += *v++ * x1;
1575       }
1576     }
1577     break;
1578   case 2:
1579     for ( i=0; i<mbs; i++ ) {
1580       n  = ii[1] - ii[0]; ii++;
1581       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1582       ib = idx + ai[i];
1583       for ( j=0; j<n; j++ ) {
1584         rval      = ib[j]*2;
1585         z[rval++] += v[0]*x1 + v[1]*x2;
1586         z[rval++] += v[2]*x1 + v[3]*x2;
1587         v += 4;
1588       }
1589     }
1590     break;
1591   case 3:
1592     for ( i=0; i<mbs; i++ ) {
1593       n  = ii[1] - ii[0]; ii++;
1594       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1595       ib = idx + ai[i];
1596       for ( j=0; j<n; j++ ) {
1597         rval      = ib[j]*3;
1598         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1599         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1600         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1601         v += 9;
1602       }
1603     }
1604     break;
1605   case 4:
1606     for ( i=0; i<mbs; i++ ) {
1607       n  = ii[1] - ii[0]; ii++;
1608       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1609       ib = idx + ai[i];
1610       for ( j=0; j<n; j++ ) {
1611         rval      = ib[j]*4;
1612         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1613         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1614         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1615         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1616         v += 16;
1617       }
1618     }
1619     break;
1620   case 5:
1621     for ( i=0; i<mbs; i++ ) {
1622       n  = ii[1] - ii[0]; ii++;
1623       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1624       x4 = xb[3];   x5 = xb[4];
1625       ib = idx + ai[i];
1626       for ( j=0; j<n; j++ ) {
1627         rval      = ib[j]*5;
1628         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1629         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1630         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1631         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1632         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1633         v += 25;
1634       }
1635     }
1636     break;
1637       /* block sizes larger then 5 by 5 are handled by BLAS */
1638     default: {
1639       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1640       if (!a->mult_work) {
1641         k = PetscMax(a->m,a->n);
1642         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1643         CHKPTRQ(a->mult_work);
1644       }
1645       work = a->mult_work;
1646       for ( i=0; i<mbs; i++ ) {
1647         n     = ii[1] - ii[0]; ii++;
1648         ncols = n*bs;
1649         PetscMemzero(work,ncols*sizeof(Scalar));
1650         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1651         v += n*bs2;
1652         x += bs;
1653         workt = work;
1654         for ( j=0; j<n; j++ ) {
1655           zb = z + bs*(*idx++);
1656           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1657           workt += bs;
1658         }
1659       }
1660     }
1661   }
1662   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1663   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1664   PLogFlops(2*a->nz*a->bs2);
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNC__
1669 #define __FUNC__ "MatGetInfo_SeqBAIJ"
1670 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1671 {
1672   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1673 
1674   PetscFunctionBegin;
1675   info->rows_global    = (double)a->m;
1676   info->columns_global = (double)a->n;
1677   info->rows_local     = (double)a->m;
1678   info->columns_local  = (double)a->n;
1679   info->block_size     = a->bs2;
1680   info->nz_allocated   = a->maxnz;
1681   info->nz_used        = a->bs2*a->nz;
1682   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1683   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1684     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1685   info->assemblies   = A->num_ass;
1686   info->mallocs      = a->reallocs;
1687   info->memory       = A->mem;
1688   if (A->factor) {
1689     info->fill_ratio_given  = A->info.fill_ratio_given;
1690     info->fill_ratio_needed = A->info.fill_ratio_needed;
1691     info->factor_mallocs    = A->info.factor_mallocs;
1692   } else {
1693     info->fill_ratio_given  = 0;
1694     info->fill_ratio_needed = 0;
1695     info->factor_mallocs    = 0;
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNC__
1701 #define __FUNC__ "MatEqual_SeqBAIJ"
1702 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1703 {
1704   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1705 
1706   PetscFunctionBegin;
1707   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1708 
1709   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1710   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1711     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1712   }
1713 
1714   /* if the a->i are the same */
1715   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1716     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1717   }
1718 
1719   /* if a->j are the same */
1720   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1721     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1722   }
1723 
1724   /* if a->a are the same */
1725   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1726     *flg = PETSC_FALSE; PetscFunctionReturn(0);
1727   }
1728   *flg = PETSC_TRUE;
1729   PetscFunctionReturn(0);
1730 
1731 }
1732 
1733 #undef __FUNC__
1734 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1735 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1738   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1739   Scalar      *x, zero = 0.0,*aa,*aa_j;
1740 
1741   PetscFunctionBegin;
1742   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1743   bs   = a->bs;
1744   aa   = a->a;
1745   ai   = a->i;
1746   aj   = a->j;
1747   ambs = a->mbs;
1748   bs2  = a->bs2;
1749 
1750   VecSet(&zero,v);
1751   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1752   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1753   for ( i=0; i<ambs; i++ ) {
1754     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1755       if (aj[j] == i) {
1756         row  = i*bs;
1757         aa_j = aa+j*bs2;
1758         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1759         break;
1760       }
1761     }
1762   }
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNC__
1767 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1768 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1769 {
1770   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1771   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1772   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1773 
1774   PetscFunctionBegin;
1775   ai  = a->i;
1776   aj  = a->j;
1777   aa  = a->a;
1778   m   = a->m;
1779   n   = a->n;
1780   bs  = a->bs;
1781   mbs = a->mbs;
1782   bs2 = a->bs2;
1783   if (ll) {
1784     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1785     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1786     for ( i=0; i<mbs; i++ ) { /* for each block row */
1787       M  = ai[i+1] - ai[i];
1788       li = l + i*bs;
1789       v  = aa + bs2*ai[i];
1790       for ( j=0; j<M; j++ ) { /* for each block */
1791         for ( k=0; k<bs2; k++ ) {
1792           (*v++) *= li[k%bs];
1793         }
1794       }
1795     }
1796   }
1797 
1798   if (rr) {
1799     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1800     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1801     for ( i=0; i<mbs; i++ ) { /* for each block row */
1802       M  = ai[i+1] - ai[i];
1803       v  = aa + bs2*ai[i];
1804       for ( j=0; j<M; j++ ) { /* for each block */
1805         ri = r + bs*aj[ai[i]+j];
1806         for ( k=0; k<bs; k++ ) {
1807           x = ri[k];
1808           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1809         }
1810       }
1811     }
1812   }
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 
1817 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1818 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1819 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1820 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1821 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1822 
1823 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1824 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1825 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1826 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1827 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1828 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1829 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1830 
1831 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1832 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1833 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1834 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1835 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1836 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1837 
1838 #undef __FUNC__
1839 #define __FUNC__ "MatNorm_SeqBAIJ"
1840 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1841 {
1842   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1843   Scalar      *v = a->a;
1844   double      sum = 0.0;
1845   int         i,nz=a->nz,bs2=a->bs2;
1846 
1847   PetscFunctionBegin;
1848   if (type == NORM_FROBENIUS) {
1849     for (i=0; i< bs2*nz; i++ ) {
1850 #if defined(USE_PETSC_COMPLEX)
1851       sum += real(conj(*v)*(*v)); v++;
1852 #else
1853       sum += (*v)*(*v); v++;
1854 #endif
1855     }
1856     *norm = sqrt(sum);
1857   }
1858   else {
1859     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
1865 /*
1866      note: This can only work for identity for row and col. It would
1867    be good to check this and otherwise generate an error.
1868 */
1869 #undef __FUNC__
1870 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1871 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1872 {
1873   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1874   Mat         outA;
1875   int         ierr;
1876 
1877   PetscFunctionBegin;
1878   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1879 
1880   outA          = inA;
1881   inA->factor   = FACTOR_LU;
1882   a->row        = row;
1883   a->col        = col;
1884 
1885   if (!a->solve_work) {
1886     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1887     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1888   }
1889 
1890   if (!a->diag) {
1891     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1892   }
1893   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1894 
1895   /*
1896       Blocksize 4 has a special faster solver for ILU(0) factorization
1897     with natural ordering
1898   */
1899   if (a->bs == 4) {
1900     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
1901   }
1902 
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNC__
1907 #define __FUNC__ "MatScale_SeqBAIJ"
1908 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1909 {
1910   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1911   int         one = 1, totalnz = a->bs2*a->nz;
1912 
1913   PetscFunctionBegin;
1914   BLscal_( &totalnz, alpha, a->a, &one );
1915   PLogFlops(totalnz);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNC__
1920 #define __FUNC__ "MatGetValues_SeqBAIJ"
1921 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1922 {
1923   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1924   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1925   int        *ai = a->i, *ailen = a->ilen;
1926   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1927   Scalar     *ap, *aa = a->a, zero = 0.0;
1928 
1929   PetscFunctionBegin;
1930   for ( k=0; k<m; k++ ) { /* loop over rows */
1931     row  = im[k]; brow = row/bs;
1932     if (row < 0) SETERRQ(1,0,"Negative row");
1933     if (row >= a->m) SETERRQ(1,0,"Row too large");
1934     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1935     nrow = ailen[brow];
1936     for ( l=0; l<n; l++ ) { /* loop over columns */
1937       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1938       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1939       col  = in[l] ;
1940       bcol = col/bs;
1941       cidx = col%bs;
1942       ridx = row%bs;
1943       high = nrow;
1944       low  = 0; /* assume unsorted */
1945       while (high-low > 5) {
1946         t = (low+high)/2;
1947         if (rp[t] > bcol) high = t;
1948         else             low  = t;
1949       }
1950       for ( i=low; i<high; i++ ) {
1951         if (rp[i] > bcol) break;
1952         if (rp[i] == bcol) {
1953           *v++ = ap[bs2*i+bs*cidx+ridx];
1954           goto finished;
1955         }
1956       }
1957       *v++ = zero;
1958       finished:;
1959     }
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNC__
1965 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
1966 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1967 {
1968   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1969 
1970   PetscFunctionBegin;
1971   *bs = baij->bs;
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 /* idx should be of length atlease bs */
1976 #undef __FUNC__
1977 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1978 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1979 {
1980   int i,row;
1981 
1982   PetscFunctionBegin;
1983   row = idx[0];
1984   if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
1985 
1986   for ( i=1; i<bs; i++ ) {
1987     if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
1988   }
1989   *flg = PETSC_TRUE;
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNC__
1994 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1995 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1996 {
1997   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1998   IS          is_local;
1999   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
2000   PetscTruth  flg;
2001   Scalar      zero = 0.0,*aa;
2002 
2003   PetscFunctionBegin;
2004   /* Make a copy of the IS and  sort it */
2005   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
2006   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
2007   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
2008   ierr = ISSort(is_local); CHKERRQ(ierr);
2009   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
2010 
2011   i = 0;
2012   while (i < is_n) {
2013     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
2014     flg = PETSC_FALSE;
2015     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
2016     if (flg) { /* There exists a block of rows to be Zerowed */
2017       baij->ilen[rows[i]/bs] = 0;
2018       i += bs;
2019     } else { /* Zero out only the requested row */
2020       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
2021       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
2022       for ( j=0; j<count; j++ ) {
2023         aa[0] = zero;
2024         aa+=bs;
2025       }
2026       i++;
2027     }
2028   }
2029   if (diag) {
2030     for ( j=0; j<is_n; j++ ) {
2031       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
2032     }
2033   }
2034   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
2035   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
2036   ierr = ISDestroy(is_local); CHKERRQ(ierr);
2037   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2038 
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNC__
2043 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
2044 int MatPrintHelp_SeqBAIJ(Mat A)
2045 {
2046   static int called = 0;
2047   MPI_Comm   comm = A->comm;
2048 
2049   PetscFunctionBegin;
2050   if (called) {PetscFunctionReturn(0);} else called = 1;
2051   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2052   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 /* -------------------------------------------------------------------*/
2057 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
2058        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2059        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2060        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
2061        MatSolve_SeqBAIJ_N,0,
2062        0,0,
2063        MatLUFactor_SeqBAIJ,0,
2064        0,
2065        MatTranspose_SeqBAIJ,
2066        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
2067        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2068        0,MatAssemblyEnd_SeqBAIJ,
2069        0,
2070        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
2071        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2072        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2073        MatILUFactorSymbolic_SeqBAIJ,0,
2074        0,0,
2075        MatConvertSameType_SeqBAIJ,0,0,
2076        MatILUFactor_SeqBAIJ,0,0,
2077        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
2078        MatGetValues_SeqBAIJ,0,
2079        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
2080        0,0,0,MatGetBlockSize_SeqBAIJ,
2081        MatGetRowIJ_SeqBAIJ,
2082        MatRestoreRowIJ_SeqBAIJ,
2083        0,0,0,0,0,0,
2084        MatSetValuesBlocked_SeqBAIJ};
2085 
2086 #undef __FUNC__
2087 #define __FUNC__ "MatCreateSeqBAIJ"
2088 /*@C
2089    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2090    compressed row) format.  For good matrix assembly performance the
2091    user should preallocate the matrix storage by setting the parameter nz
2092    (or the array nzz).  By setting these parameters accurately, performance
2093    during matrix assembly can be increased by more than a factor of 50.
2094 
2095    Input Parameters:
2096 .  comm - MPI communicator, set to PETSC_COMM_SELF
2097 .  bs - size of block
2098 .  m - number of rows
2099 .  n - number of columns
2100 .  nz - number of block nonzeros per block row (same for all rows)
2101 .  nzz - array containing the number of block nonzeros in the various block rows
2102          (possibly different for each block row) or PETSC_NULL
2103 
2104    Output Parameter:
2105 .  A - the matrix
2106 
2107    Options Database Keys:
2108 $    -mat_no_unroll - uses code that does not unroll the loops in the
2109 $                     block calculations (much slower)
2110 $    -mat_block_size - size of the blocks to use
2111 
2112    Notes:
2113    The block AIJ format is fully compatible with standard Fortran 77
2114    storage.  That is, the stored row and column indices can begin at
2115    either one (as in Fortran) or zero.  See the users' manual for details.
2116 
2117    Specify the preallocated storage with either nz or nnz (not both).
2118    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2119    allocation.  For additional details, see the users manual chapter on
2120    matrices.
2121 
2122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2123 @*/
2124 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
2125 {
2126   Mat         B;
2127   Mat_SeqBAIJ *b;
2128   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
2129 
2130   PetscFunctionBegin;
2131   MPI_Comm_size(comm,&size);
2132   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2133 
2134   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
2135 
2136   if (mbs*bs!=m || nbs*bs!=n) {
2137     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
2138   }
2139 
2140   *A = 0;
2141   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
2142   PLogObjectCreate(B);
2143   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
2144   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2145   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
2146   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
2147   if (!flg) {
2148     switch (bs) {
2149     case 1:
2150       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2151       B->ops.solve           = MatSolve_SeqBAIJ_1;
2152       B->ops.mult            = MatMult_SeqBAIJ_1;
2153       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
2154       break;
2155     case 2:
2156       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2157       B->ops.solve           = MatSolve_SeqBAIJ_2;
2158       B->ops.mult            = MatMult_SeqBAIJ_2;
2159       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
2160       break;
2161     case 3:
2162       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2163       B->ops.solve           = MatSolve_SeqBAIJ_3;
2164       B->ops.mult            = MatMult_SeqBAIJ_3;
2165       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
2166       break;
2167     case 4:
2168       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2169       B->ops.solve           = MatSolve_SeqBAIJ_4;
2170       B->ops.mult            = MatMult_SeqBAIJ_4;
2171       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
2172       break;
2173     case 5:
2174       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2175       B->ops.solve           = MatSolve_SeqBAIJ_5;
2176       B->ops.mult            = MatMult_SeqBAIJ_5;
2177       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
2178       break;
2179     case 7:
2180       B->ops.mult            = MatMult_SeqBAIJ_7;
2181       B->ops.solve           = MatSolve_SeqBAIJ_7;
2182       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2183       break;
2184     }
2185   }
2186   B->destroy          = MatDestroy_SeqBAIJ;
2187   B->view             = MatView_SeqBAIJ;
2188   B->factor           = 0;
2189   B->lupivotthreshold = 1.0;
2190   B->mapping          = 0;
2191   b->row              = 0;
2192   b->col              = 0;
2193   b->reallocs         = 0;
2194 
2195   b->m       = m; B->m = m; B->M = m;
2196   b->n       = n; B->n = n; B->N = n;
2197   b->mbs     = mbs;
2198   b->nbs     = nbs;
2199   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2200   if (nnz == PETSC_NULL) {
2201     if (nz == PETSC_DEFAULT) nz = 5;
2202     else if (nz <= 0)        nz = 1;
2203     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2204     nz = nz*mbs;
2205   } else {
2206     nz = 0;
2207     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2208   }
2209 
2210   /* allocate the matrix space */
2211   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2212   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2213   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2214   b->j  = (int *) (b->a + nz*bs2);
2215   PetscMemzero(b->j,nz*sizeof(int));
2216   b->i  = b->j + nz;
2217   b->singlemalloc = 1;
2218 
2219   b->i[0] = 0;
2220   for (i=1; i<mbs+1; i++) {
2221     b->i[i] = b->i[i-1] + b->imax[i-1];
2222   }
2223 
2224   /* b->ilen will count nonzeros in each block row so far. */
2225   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2226   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2227   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2228 
2229   b->bs               = bs;
2230   b->bs2              = bs2;
2231   b->mbs              = mbs;
2232   b->nz               = 0;
2233   b->maxnz            = nz*bs2;
2234   b->sorted           = 0;
2235   b->roworiented      = 1;
2236   b->nonew            = 0;
2237   b->diag             = 0;
2238   b->solve_work       = 0;
2239   b->mult_work        = 0;
2240   b->spptr            = 0;
2241   B->info.nz_unneeded = (double)b->maxnz;
2242 
2243   *A = B;
2244   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2245   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #undef __FUNC__
2250 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2251 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2252 {
2253   Mat         C;
2254   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2255   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2256 
2257   PetscFunctionBegin;
2258   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
2259 
2260   *B = 0;
2261   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
2262   PLogObjectCreate(C);
2263   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2264   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2265   C->destroy    = MatDestroy_SeqBAIJ;
2266   C->view       = MatView_SeqBAIJ;
2267   C->factor     = A->factor;
2268   c->row        = 0;
2269   c->col        = 0;
2270   C->assembled  = PETSC_TRUE;
2271 
2272   c->m = C->m   = a->m;
2273   c->n = C->n   = a->n;
2274   C->M          = a->m;
2275   C->N          = a->n;
2276 
2277   c->bs         = a->bs;
2278   c->bs2        = a->bs2;
2279   c->mbs        = a->mbs;
2280   c->nbs        = a->nbs;
2281 
2282   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2283   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2284   for ( i=0; i<mbs; i++ ) {
2285     c->imax[i] = a->imax[i];
2286     c->ilen[i] = a->ilen[i];
2287   }
2288 
2289   /* allocate the matrix space */
2290   c->singlemalloc = 1;
2291   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2292   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2293   c->j  = (int *) (c->a + nz*bs2);
2294   c->i  = c->j + nz;
2295   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2296   if (mbs > 0) {
2297     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2298     if (cpvalues == COPY_VALUES) {
2299       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2300     }
2301   }
2302 
2303   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2304   c->sorted      = a->sorted;
2305   c->roworiented = a->roworiented;
2306   c->nonew       = a->nonew;
2307 
2308   if (a->diag) {
2309     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2310     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2311     for ( i=0; i<mbs; i++ ) {
2312       c->diag[i] = a->diag[i];
2313     }
2314   }
2315   else c->diag          = 0;
2316   c->nz                 = a->nz;
2317   c->maxnz              = a->maxnz;
2318   c->solve_work         = 0;
2319   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2320   c->mult_work          = 0;
2321   *B = C;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNC__
2326 #define __FUNC__ "MatLoad_SeqBAIJ"
2327 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2328 {
2329   Mat_SeqBAIJ  *a;
2330   Mat          B;
2331   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2332   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2333   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2334   int          *masked, nmask,tmp,bs2,ishift;
2335   Scalar       *aa;
2336   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2337 
2338   PetscFunctionBegin;
2339   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2340   bs2  = bs*bs;
2341 
2342   MPI_Comm_size(comm,&size);
2343   if (size > 1) SETERRQ(1,0,"view must have one processor");
2344   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2345   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
2346   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2347   M = header[1]; N = header[2]; nz = header[3];
2348 
2349   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2350 
2351   /*
2352      This code adds extra rows to make sure the number of rows is
2353     divisible by the blocksize
2354   */
2355   mbs        = M/bs;
2356   extra_rows = bs - M + bs*(mbs);
2357   if (extra_rows == bs) extra_rows = 0;
2358   else                  mbs++;
2359   if (extra_rows) {
2360     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2361   }
2362 
2363   /* read in row lengths */
2364   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2365   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2366   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2367 
2368   /* read in column indices */
2369   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2370   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
2371   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2372 
2373   /* loop over row lengths determining block row lengths */
2374   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2375   PetscMemzero(browlengths,mbs*sizeof(int));
2376   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2377   PetscMemzero(mask,mbs*sizeof(int));
2378   masked = mask + mbs;
2379   rowcount = 0; nzcount = 0;
2380   for ( i=0; i<mbs; i++ ) {
2381     nmask = 0;
2382     for ( j=0; j<bs; j++ ) {
2383       kmax = rowlengths[rowcount];
2384       for ( k=0; k<kmax; k++ ) {
2385         tmp = jj[nzcount++]/bs;
2386         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2387       }
2388       rowcount++;
2389     }
2390     browlengths[i] += nmask;
2391     /* zero out the mask elements we set */
2392     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2393   }
2394 
2395   /* create our matrix */
2396   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
2397   B = *A;
2398   a = (Mat_SeqBAIJ *) B->data;
2399 
2400   /* set matrix "i" values */
2401   a->i[0] = 0;
2402   for ( i=1; i<= mbs; i++ ) {
2403     a->i[i]      = a->i[i-1] + browlengths[i-1];
2404     a->ilen[i-1] = browlengths[i-1];
2405   }
2406   a->nz         = 0;
2407   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2408 
2409   /* read in nonzero values */
2410   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2411   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
2412   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2413 
2414   /* set "a" and "j" values into matrix */
2415   nzcount = 0; jcount = 0;
2416   for ( i=0; i<mbs; i++ ) {
2417     nzcountb = nzcount;
2418     nmask    = 0;
2419     for ( j=0; j<bs; j++ ) {
2420       kmax = rowlengths[i*bs+j];
2421       for ( k=0; k<kmax; k++ ) {
2422         tmp = jj[nzcount++]/bs;
2423 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2424       }
2425       rowcount++;
2426     }
2427     /* sort the masked values */
2428     PetscSortInt(nmask,masked);
2429 
2430     /* set "j" values into matrix */
2431     maskcount = 1;
2432     for ( j=0; j<nmask; j++ ) {
2433       a->j[jcount++]  = masked[j];
2434       mask[masked[j]] = maskcount++;
2435     }
2436     /* set "a" values into matrix */
2437     ishift = bs2*a->i[i];
2438     for ( j=0; j<bs; j++ ) {
2439       kmax = rowlengths[i*bs+j];
2440       for ( k=0; k<kmax; k++ ) {
2441         tmp    = jj[nzcountb]/bs ;
2442         block  = mask[tmp] - 1;
2443         point  = jj[nzcountb] - bs*tmp;
2444         idx    = ishift + bs2*block + j + bs*point;
2445         a->a[idx] = aa[nzcountb++];
2446       }
2447     }
2448     /* zero out the mask elements we set */
2449     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2450   }
2451   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2452 
2453   PetscFree(rowlengths);
2454   PetscFree(browlengths);
2455   PetscFree(aa);
2456   PetscFree(jj);
2457   PetscFree(mask);
2458 
2459   B->assembled = PETSC_TRUE;
2460 
2461   ierr = MatView_Private(B); CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 
2466 
2467