xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2b36279985d181bf1ae28d424577ae2c2fbebdf4)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.95 1997/03/26 01:36:13 bsmith Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 
10 #include "pinclude/pviewer.h"
11 #include "sys.h"
12 #include "src/mat/impls/baij/seq/baij.h"
13 #include "src/vec/vecimpl.h"
14 #include "src/inline/spops.h"
15 #include "petsc.h"
16 
17 
18 /*
19      Adds diagonal pointers to sparse matrix structure.
20 */
21 
22 #undef __FUNC__
23 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24 int MatMarkDiag_SeqBAIJ(Mat A)
25 {
26   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
27   int         i,j, *diag, m = a->mbs;
28 
29   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30   PLogObjectMemory(A,(m+1)*sizeof(int));
31   for ( i=0; i<m; i++ ) {
32     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33       if (a->j[j] == i) {
34         diag[i] = j;
35         break;
36       }
37     }
38   }
39   a->diag = diag;
40   return 0;
41 }
42 
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */
346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert1;
418         }
419       }
420       if (nonew == 1) goto noinsert1;
421       else if (nonew == -1) SETERRQ(1,1,"Inserting a new nonzero in the matrix");
422       if (nrow >= rmax) {
423         /* there is no extra room in row, therefore enlarge */
424         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
425         Scalar *new_a;
426 
427         /* malloc new storage space */
428         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
429         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
430         new_j   = (int *) (new_a + bs2*new_nz);
431         new_i   = new_j + new_nz;
432 
433         /* copy over old data into new slots */
434         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
435         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
436         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
437         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
438         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
439                                                            len*sizeof(int));
440         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
441         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
442         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
443                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
444         /* free up old matrix storage */
445         PetscFree(a->a);
446         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
447         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
448         a->singlemalloc = 1;
449 
450         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
451         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
452         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
453         a->maxnz += bs2*CHUNKSIZE;
454         a->reallocs++;
455         a->nz++;
456       }
457       N = nrow++ - 1;
458       /* shift up all the later entries in this row */
459       for ( ii=N; ii>=i; ii-- ) {
460         rp[ii+1] = rp[ii];
461         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
462       }
463       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
464       rp[i]                      = bcol;
465       ap[bs2*i + bs*cidx + ridx] = value;
466       noinsert1:;
467       low = i;
468     }
469     ailen[brow] = nrow;
470   }
471   return 0;
472 }
473 
474 #undef __FUNC__
475 #define __FUNC__ "MatSetValues_SeqBAIJ"
476 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
477 {
478   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
479   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
480   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
481   int         roworiented=a->roworiented;
482   int         *aj=a->j,nonew=a->nonew;
483   int          bs2=a->bs2,bs=a->bs,stepval;
484   Scalar      *ap,*value=v,*aa=a->a,*bap;
485 
486   if (roworiented) {
487     stepval = (n-1)*bs;
488   } else {
489     stepval = (m-1)*bs;
490   }
491   for ( k=0; k<m; k++ ) { /* loop over added rows */
492     row  = im[k];
493 #if defined(PETSC_BOPT_g)
494     if (row < 0) SETERRQ(1,0,"Negative row");
495     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
496 #endif
497     rp   = aj + ai[row];
498     ap   = aa + bs2*ai[row];
499     rmax = imax[row];
500     nrow = ailen[row];
501     low  = 0;
502     for ( l=0; l<n; l++ ) { /* loop over added columns */
503 #if defined(PETSC_BOPT_g)
504       if (in[l] < 0) SETERRQ(1,0,"Negative column");
505       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
506 #endif
507       col = in[l];
508       if (roworiented) {
509         value = v + k*(stepval+bs)*bs + l*bs;
510       } else {
511         value = v + l*(stepval+bs)*bs + k*bs;
512       }
513       if (!sorted) low = 0; high = nrow;
514       while (high-low > 7) {
515         t = (low+high)/2;
516         if (rp[t] > col) high = t;
517         else             low  = t;
518       }
519       for ( i=low; i<high; i++ ) {
520         if (rp[i] > col) break;
521         if (rp[i] == col) {
522           bap  = ap +  bs2*i;
523           if (roworiented) {
524             if (is == ADD_VALUES) {
525               for ( ii=0; ii<bs; ii++,value+=stepval )
526                 for (jj=ii; jj<bs2; jj+=bs )
527                   bap[jj] += *value++;
528             } else {
529               for ( ii=0; ii<bs; ii++,value+=stepval )
530                 for (jj=ii; jj<bs2; jj+=bs )
531                   bap[jj] = *value++;
532             }
533           } else {
534             if (is == ADD_VALUES) {
535               for ( ii=0; ii<bs; ii++,value+=stepval )
536                 for (jj=0; jj<bs; jj++ )
537                   *bap++ += *value++;
538             } else {
539               for ( ii=0; ii<bs; ii++,value+=stepval )
540                 for (jj=0; jj<bs; jj++ )
541                   *bap++  = *value++;
542             }
543           }
544           goto noinsert2;
545         }
546       }
547       if (nonew) goto noinsert2;
548       if (nrow >= rmax) {
549         /* there is no extra room in row, therefore enlarge */
550         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
551         Scalar *new_a;
552 
553         /* malloc new storage space */
554         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
555         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
556         new_j   = (int *) (new_a + bs2*new_nz);
557         new_i   = new_j + new_nz;
558 
559         /* copy over old data into new slots */
560         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
561         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
562         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
563         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
564         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
565                                                            len*sizeof(int));
566         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
567         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
568         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
569                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
570         /* free up old matrix storage */
571         PetscFree(a->a);
572         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
573         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
574         a->singlemalloc = 1;
575 
576         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
577         rmax = imax[row] = imax[row] + CHUNKSIZE;
578         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
579         a->maxnz += bs2*CHUNKSIZE;
580         a->reallocs++;
581         a->nz++;
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for ( ii=N; ii>=i; ii-- ) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
588       }
589       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
590       rp[i] = col;
591       bap   = ap +  bs2*i;
592       if (roworiented) {
593         for ( ii=0; ii<bs; ii++,value+=stepval)
594           for (jj=ii; jj<bs2; jj+=bs )
595             bap[jj] = *value++;
596       } else {
597         for ( ii=0; ii<bs; ii++,value+=stepval )
598           for (jj=0; jj<bs; jj++ )
599             *bap++  = *value++;
600       }
601       noinsert2:;
602       low = i;
603     }
604     ailen[row] = nrow;
605   }
606   return 0;
607 }
608 
609 #undef __FUNC__
610 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */
611 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
612 {
613   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
614   *m = a->m; *n = a->n;
615   return 0;
616 }
617 
618 #undef __FUNC__
619 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */
620 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
621 {
622   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
623   *m = 0; *n = a->m;
624   return 0;
625 }
626 
627 #undef __FUNC__
628 #define __FUNC__ "MatGetRow_SeqBAIJ"
629 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
630 {
631   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
632   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
633   Scalar      *aa,*v_i,*aa_i;
634 
635   bs  = a->bs;
636   ai  = a->i;
637   aj  = a->j;
638   aa  = a->a;
639   bs2 = a->bs2;
640 
641   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
642 
643   bn  = row/bs;   /* Block number */
644   bp  = row % bs; /* Block Position */
645   M   = ai[bn+1] - ai[bn];
646   *nz = bs*M;
647 
648   if (v) {
649     *v = 0;
650     if (*nz) {
651       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
652       for ( i=0; i<M; i++ ) { /* for each block in the block row */
653         v_i  = *v + i*bs;
654         aa_i = aa + bs2*(ai[bn] + i);
655         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
656       }
657     }
658   }
659 
660   if (idx) {
661     *idx = 0;
662     if (*nz) {
663       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
664       for ( i=0; i<M; i++ ) { /* for each block in the block row */
665         idx_i = *idx + i*bs;
666         itmp  = bs*aj[ai[bn] + i];
667         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
668       }
669     }
670   }
671   return 0;
672 }
673 
674 #undef __FUNC__
675 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */
676 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
677 {
678   if (idx) {if (*idx) PetscFree(*idx);}
679   if (v)   {if (*v)   PetscFree(*v);}
680   return 0;
681 }
682 
683 #undef __FUNC__
684 #define __FUNC__ "MatTranspose_SeqBAIJ"
685 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
686 {
687   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
688   Mat         C;
689   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
690   int         *rows,*cols,bs2=a->bs2;
691   Scalar      *array=a->a;
692 
693   if (B==PETSC_NULL && mbs!=nbs)
694     SETERRQ(1,0,"Square matrix only for in-place");
695   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
696   PetscMemzero(col,(1+nbs)*sizeof(int));
697 
698   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
699   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
700   PetscFree(col);
701   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
702   cols = rows + bs;
703   for ( i=0; i<mbs; i++ ) {
704     cols[0] = i*bs;
705     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
706     len = ai[i+1] - ai[i];
707     for ( j=0; j<len; j++ ) {
708       rows[0] = (*aj++)*bs;
709       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
710       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
711       array += bs2;
712     }
713   }
714   PetscFree(rows);
715 
716   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
717   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
718 
719   if (B != PETSC_NULL) {
720     *B = C;
721   } else {
722     /* This isn't really an in-place transpose */
723     PetscFree(a->a);
724     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
725     if (a->diag) PetscFree(a->diag);
726     if (a->ilen) PetscFree(a->ilen);
727     if (a->imax) PetscFree(a->imax);
728     if (a->solve_work) PetscFree(a->solve_work);
729     PetscFree(a);
730     PetscMemcpy(A,C,sizeof(struct _Mat));
731     PetscHeaderDestroy(C);
732   }
733   return 0;
734 }
735 
736 
737 #undef __FUNC__
738 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
739 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
740 {
741   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
742   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
743   int        m = a->m,*ip, N, *ailen = a->ilen;
744   int        mbs = a->mbs, bs2 = a->bs2,rmax;
745   Scalar     *aa = a->a, *ap;
746 
747   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
748 
749   rmax = ailen[0];
750   for ( i=1; i<mbs; i++ ) {
751     /* move each row back by the amount of empty slots (fshift) before it*/
752     fshift += imax[i-1] - ailen[i-1];
753     rmax   = PetscMax(rmax,ailen[i]);
754     if (fshift) {
755       ip = aj + ai[i]; ap = aa + bs2*ai[i];
756       N = ailen[i];
757       for ( j=0; j<N; j++ ) {
758         ip[j-fshift] = ip[j];
759         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
760       }
761     }
762     ai[i] = ai[i-1] + ailen[i-1];
763   }
764   if (mbs) {
765     fshift += imax[mbs-1] - ailen[mbs-1];
766     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
767   }
768   /* reset ilen and imax for each row */
769   for ( i=0; i<mbs; i++ ) {
770     ailen[i] = imax[i] = ai[i+1] - ai[i];
771   }
772   a->nz = ai[mbs];
773 
774   /* diagonals may have moved, so kill the diagonal pointers */
775   if (fshift && a->diag) {
776     PetscFree(a->diag);
777     PLogObjectMemory(A,-(m+1)*sizeof(int));
778     a->diag = 0;
779   }
780   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
781            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
782   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
783            a->reallocs);
784   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
785   a->reallocs          = 0;
786   A->info.nz_unneeded  = (double)fshift*bs2;
787 
788   return 0;
789 }
790 
791 #undef __FUNC__
792 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
793 int MatZeroEntries_SeqBAIJ(Mat A)
794 {
795   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
796   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
797   return 0;
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */
802 int MatDestroy_SeqBAIJ(PetscObject obj)
803 {
804   Mat         A  = (Mat) obj;
805   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
806   int         ierr;
807 
808 #if defined(PETSC_LOG)
809   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
810 #endif
811   PetscFree(a->a);
812   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
813   if (a->diag) PetscFree(a->diag);
814   if (a->ilen) PetscFree(a->ilen);
815   if (a->imax) PetscFree(a->imax);
816   if (a->solve_work) PetscFree(a->solve_work);
817   if (a->mult_work) PetscFree(a->mult_work);
818   PetscFree(a);
819   if (A->mapping) {
820     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
821   }
822   PLogObjectDestroy(A);
823   PetscHeaderDestroy(A);
824   return 0;
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */
829 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
830 {
831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
832   if      (op == MAT_ROW_ORIENTED)               a->roworiented = 1;
833   else if (op == MAT_COLUMN_ORIENTED)            a->roworiented = 0;
834   else if (op == MAT_COLUMNS_SORTED)             a->sorted      = 1;
835   else if (op == MAT_COLUMNS_UNSORTED)           a->sorted      = 0;
836   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)   a->nonew       = 1;
837   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew       = -1;
838   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)  a->nonew       = 0;
839   else if (op == MAT_ROWS_SORTED ||
840            op == MAT_ROWS_UNSORTED ||
841            op == MAT_SYMMETRIC ||
842            op == MAT_STRUCTURALLY_SYMMETRIC ||
843            op == MAT_YES_NEW_DIAGONALS ||
844            op == MAT_IGNORE_OFF_PROC_ENTRIES)
845     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
846   else if (op == MAT_NO_NEW_DIAGONALS)
847     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
848   else
849     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
850   return 0;
851 }
852 
853 
854 /* -------------------------------------------------------*/
855 /* Should check that shapes of vectors and matrices match */
856 /* -------------------------------------------------------*/
857 #include "pinclude/plapack.h"
858 
859 #undef __FUNC__
860 #define __FUNC__ "MatMult_SeqBAIJ_1"
861 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
862 {
863   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
864   register Scalar *x,*z,*v,sum;
865   int             mbs=a->mbs,i,*idx,*ii,n;
866 
867   VecGetArray_Fast(xx,x);
868   VecGetArray_Fast(zz,z);
869 
870   idx   = a->j;
871   v     = a->a;
872   ii    = a->i;
873 
874   for ( i=0; i<mbs; i++ ) {
875     n    = ii[1] - ii[0]; ii++;
876     sum  = 0.0;
877     while (n--) sum += *v++ * x[*idx++];
878     z[i] = sum;
879   }
880   VecRestoreArray_Fast(xx,x);
881   VecRestoreArray_Fast(zz,z);
882   PLogFlops(2*a->nz - a->m);
883   return 0;
884 }
885 
886 #undef __FUNC__
887 #define __FUNC__ "MatMult_SeqBAIJ_2"
888 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
889 {
890   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
891   register Scalar *x,*z,*v,*xb,sum1,sum2;
892   register Scalar x1,x2;
893   int             mbs=a->mbs,i,*idx,*ii;
894   int             j,n;
895 
896   VecGetArray_Fast(xx,x);
897   VecGetArray_Fast(zz,z);
898 
899   idx   = a->j;
900   v     = a->a;
901   ii    = a->i;
902 
903   for ( i=0; i<mbs; i++ ) {
904     n  = ii[1] - ii[0]; ii++;
905     sum1 = 0.0; sum2 = 0.0;
906     for ( j=0; j<n; j++ ) {
907       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
908       sum1 += v[0]*x1 + v[2]*x2;
909       sum2 += v[1]*x1 + v[3]*x2;
910       v += 4;
911     }
912     z[0] = sum1; z[1] = sum2;
913     z += 2;
914   }
915   VecRestoreArray_Fast(xx,x);
916   VecRestoreArray_Fast(zz,z);
917   PLogFlops(4*a->nz - a->m);
918   return 0;
919 }
920 
921 #undef __FUNC__
922 #define __FUNC__ "MatMult_SeqBAIJ_3"
923 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
924 {
925   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
926   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
927   int             mbs=a->mbs,i,*idx,*ii,j,n;
928 
929   VecGetArray_Fast(xx,x);
930   VecGetArray_Fast(zz,z);
931 
932   idx   = a->j;
933   v     = a->a;
934   ii    = a->i;
935 
936   for ( i=0; i<mbs; i++ ) {
937     n  = ii[1] - ii[0]; ii++;
938     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
939     for ( j=0; j<n; j++ ) {
940       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
941       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
942       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
943       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
944       v += 9;
945     }
946     z[0] = sum1; z[1] = sum2; z[2] = sum3;
947     z += 3;
948   }
949   VecRestoreArray_Fast(xx,x);
950   VecRestoreArray_Fast(zz,z);
951   PLogFlops(18*a->nz - a->m);
952   return 0;
953 }
954 
955 #undef __FUNC__
956 #define __FUNC__ "MatMult_SeqBAIJ_4"
957 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
958 {
959   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
960   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
961   register Scalar x1,x2,x3,x4;
962   int             mbs=a->mbs,i,*idx,*ii;
963   int             j,n;
964 
965   VecGetArray_Fast(xx,x);
966   VecGetArray_Fast(zz,z);
967 
968   idx   = a->j;
969   v     = a->a;
970   ii    = a->i;
971 
972   for ( i=0; i<mbs; i++ ) {
973     n  = ii[1] - ii[0]; ii++;
974     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
975     for ( j=0; j<n; j++ ) {
976       xb = x + 4*(*idx++);
977       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
978       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
979       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
980       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
981       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
982       v += 16;
983     }
984     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
985     z += 4;
986   }
987   VecRestoreArray_Fast(xx,x);
988   VecRestoreArray_Fast(zz,z);
989   PLogFlops(32*a->nz - a->m);
990   return 0;
991 }
992 
993 #undef __FUNC__
994 #define __FUNC__ "MatMult_SeqBAIJ_5"
995 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
996 {
997   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
998   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
999   register Scalar x1,x2,x3,x4,x5;
1000   int             mbs=a->mbs,i,*idx,*ii,j,n;
1001 
1002   VecGetArray_Fast(xx,x);
1003   VecGetArray_Fast(zz,z);
1004 
1005   idx   = a->j;
1006   v     = a->a;
1007   ii    = a->i;
1008 
1009   for ( i=0; i<mbs; i++ ) {
1010     n  = ii[1] - ii[0]; ii++;
1011     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1012     for ( j=0; j<n; j++ ) {
1013       xb = x + 5*(*idx++);
1014       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1015       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1016       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1017       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1018       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1019       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1020       v += 25;
1021     }
1022     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1023     z += 5;
1024   }
1025   VecRestoreArray_Fast(xx,x);
1026   VecRestoreArray_Fast(zz,z);
1027   PLogFlops(50*a->nz - a->m);
1028   return 0;
1029 }
1030 
1031 #undef __FUNC__
1032 #define __FUNC__ "MatMult_SeqBAIJ_7"
1033 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1034 {
1035   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1036   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1037   register Scalar x1,x2,x3,x4,x5,x6,x7;
1038   int             mbs=a->mbs,i,*idx,*ii,j,n;
1039 
1040   VecGetArray_Fast(xx,x);
1041   VecGetArray_Fast(zz,z);
1042 
1043   idx   = a->j;
1044   v     = a->a;
1045   ii    = a->i;
1046 
1047   for ( i=0; i<mbs; i++ ) {
1048     n  = ii[1] - ii[0]; ii++;
1049     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1050     for ( j=0; j<n; j++ ) {
1051       xb = x + 7*(*idx++);
1052       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1053       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1054       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1055       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1056       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1057       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1058       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1059       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1060       v += 49;
1061     }
1062     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1063     z += 7;
1064   }
1065 
1066   VecRestoreArray_Fast(xx,x);
1067   VecRestoreArray_Fast(zz,z);
1068   PLogFlops(98*a->nz - a->m);
1069   return 0;
1070 }
1071 
1072 #undef __FUNC__
1073 #define __FUNC__ "MatMult_SeqBAIJ_N"
1074 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1075 {
1076   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1077   register Scalar *x,*z,*v,*xb;
1078   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1079   int             _One = 1,ncols,k;
1080   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1081 
1082   VecGetArray_Fast(xx,x);
1083   VecGetArray_Fast(zz,z);
1084 
1085   idx   = a->j;
1086   v     = a->a;
1087   ii    = a->i;
1088 
1089 
1090   if (!a->mult_work) {
1091     k = PetscMax(a->m,a->n);
1092     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1093   }
1094   work = a->mult_work;
1095   for ( i=0; i<mbs; i++ ) {
1096     n     = ii[1] - ii[0]; ii++;
1097     ncols = n*bs;
1098     workt = work;
1099     for ( j=0; j<n; j++ ) {
1100       xb = x + bs*(*idx++);
1101       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1102       workt += bs;
1103     }
1104     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1105     v += n*bs2;
1106     z += bs;
1107   }
1108   VecRestoreArray_Fast(xx,x);
1109   VecRestoreArray_Fast(zz,z);
1110   PLogFlops(2*a->nz*bs2 - a->m);
1111   return 0;
1112 }
1113 
1114 #undef __FUNC__
1115 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1116 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1117 {
1118   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1119   register Scalar *x,*y,*z,*v,sum;
1120   int             mbs=a->mbs,i,*idx,*ii,n;
1121 
1122   VecGetArray_Fast(xx,x);
1123   VecGetArray_Fast(yy,y);
1124   VecGetArray_Fast(zz,z);
1125 
1126   idx   = a->j;
1127   v     = a->a;
1128   ii    = a->i;
1129 
1130   for ( i=0; i<mbs; i++ ) {
1131     n    = ii[1] - ii[0]; ii++;
1132     sum  = y[i];
1133     while (n--) sum += *v++ * x[*idx++];
1134     z[i] = sum;
1135   }
1136   VecRestoreArray_Fast(xx,x);
1137   VecRestoreArray_Fast(yy,y);
1138   VecRestoreArray_Fast(zz,z);
1139   PLogFlops(2*a->nz);
1140   return 0;
1141 }
1142 
1143 #undef __FUNC__
1144 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1145 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1148   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1149   register Scalar x1,x2;
1150   int             mbs=a->mbs,i,*idx,*ii;
1151   int             j,n;
1152 
1153   VecGetArray_Fast(xx,x);
1154   VecGetArray_Fast(yy,y);
1155   VecGetArray_Fast(zz,z);
1156 
1157   idx   = a->j;
1158   v     = a->a;
1159   ii    = a->i;
1160 
1161   for ( i=0; i<mbs; i++ ) {
1162     n  = ii[1] - ii[0]; ii++;
1163     sum1 = y[0]; sum2 = y[1];
1164     for ( j=0; j<n; j++ ) {
1165       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1166       sum1 += v[0]*x1 + v[2]*x2;
1167       sum2 += v[1]*x1 + v[3]*x2;
1168       v += 4;
1169     }
1170     z[0] = sum1; z[1] = sum2;
1171     z += 2; y += 2;
1172   }
1173   VecRestoreArray_Fast(xx,x);
1174   VecRestoreArray_Fast(yy,y);
1175   VecRestoreArray_Fast(zz,z);
1176   PLogFlops(4*a->nz);
1177   return 0;
1178 }
1179 
1180 #undef __FUNC__
1181 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1182 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1183 {
1184   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1185   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1186   int             mbs=a->mbs,i,*idx,*ii,j,n;
1187 
1188   VecGetArray_Fast(xx,x);
1189   VecGetArray_Fast(yy,y);
1190   VecGetArray_Fast(zz,z);
1191 
1192   idx   = a->j;
1193   v     = a->a;
1194   ii    = a->i;
1195 
1196   for ( i=0; i<mbs; i++ ) {
1197     n  = ii[1] - ii[0]; ii++;
1198     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1199     for ( j=0; j<n; j++ ) {
1200       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1201       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1202       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1203       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1204       v += 9;
1205     }
1206     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1207     z += 3; y += 3;
1208   }
1209   VecRestoreArray_Fast(xx,x);
1210   VecRestoreArray_Fast(yy,y);
1211   VecRestoreArray_Fast(zz,z);
1212   PLogFlops(18*a->nz);
1213   return 0;
1214 }
1215 
1216 #undef __FUNC__
1217 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1218 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1221   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1222   register Scalar x1,x2,x3,x4;
1223   int             mbs=a->mbs,i,*idx,*ii;
1224   int             j,n;
1225 
1226   VecGetArray_Fast(xx,x);
1227   VecGetArray_Fast(yy,y);
1228   VecGetArray_Fast(zz,z);
1229 
1230   idx   = a->j;
1231   v     = a->a;
1232   ii    = a->i;
1233 
1234   for ( i=0; i<mbs; i++ ) {
1235     n  = ii[1] - ii[0]; ii++;
1236     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1237     for ( j=0; j<n; j++ ) {
1238       xb = x + 4*(*idx++);
1239       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1240       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1241       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1242       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1243       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1244       v += 16;
1245     }
1246     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1247     z += 4; y += 4;
1248   }
1249   VecRestoreArray_Fast(xx,x);
1250   VecRestoreArray_Fast(yy,y);
1251   VecRestoreArray_Fast(zz,z);
1252   PLogFlops(32*a->nz);
1253   return 0;
1254 }
1255 
1256 #undef __FUNC__
1257 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1258 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1259 {
1260   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1261   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1262   register Scalar x1,x2,x3,x4,x5;
1263   int             mbs=a->mbs,i,*idx,*ii,j,n;
1264 
1265   VecGetArray_Fast(xx,x);
1266   VecGetArray_Fast(yy,y);
1267   VecGetArray_Fast(zz,z);
1268 
1269   idx   = a->j;
1270   v     = a->a;
1271   ii    = a->i;
1272 
1273   for ( i=0; i<mbs; i++ ) {
1274     n  = ii[1] - ii[0]; ii++;
1275     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1276     for ( j=0; j<n; j++ ) {
1277       xb = x + 5*(*idx++);
1278       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1279       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1280       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1281       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1282       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1283       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1284       v += 25;
1285     }
1286     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1287     z += 5; y += 5;
1288   }
1289   VecRestoreArray_Fast(xx,x);
1290   VecRestoreArray_Fast(yy,y);
1291   VecRestoreArray_Fast(zz,z);
1292   PLogFlops(50*a->nz);
1293   return 0;
1294 }
1295 
1296 #undef __FUNC__
1297 #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1298 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1299 {
1300   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1301   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1302   register Scalar x1,x2,x3,x4,x5,x6,x7;
1303   int             mbs=a->mbs,i,*idx,*ii,j,n;
1304 
1305   VecGetArray_Fast(xx,x);
1306   VecGetArray_Fast(yy,y);
1307   VecGetArray_Fast(zz,z);
1308 
1309   idx   = a->j;
1310   v     = a->a;
1311   ii    = a->i;
1312 
1313   for ( i=0; i<mbs; i++ ) {
1314     n  = ii[1] - ii[0]; ii++;
1315     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1316     for ( j=0; j<n; j++ ) {
1317       xb = x + 7*(*idx++);
1318       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1319       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1320       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1321       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1322       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1323       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1324       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1325       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1326       v += 49;
1327     }
1328     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1329     z += 7; y += 7;
1330   }
1331   VecRestoreArray_Fast(xx,x);
1332   VecRestoreArray_Fast(yy,y);
1333   VecRestoreArray_Fast(zz,z);
1334   PLogFlops(98*a->nz);
1335   return 0;
1336 }
1337 
1338 
1339 #undef __FUNC__
1340 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1341 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1342 {
1343   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1344   register Scalar *x,*z,*v,*xb;
1345   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1346   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1347 
1348   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1349 
1350   VecGetArray_Fast(xx,x);
1351   VecGetArray_Fast(zz,z);
1352 
1353   idx   = a->j;
1354   v     = a->a;
1355   ii    = a->i;
1356 
1357 
1358   if (!a->mult_work) {
1359     k = PetscMax(a->m,a->n);
1360     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1361   }
1362   work = a->mult_work;
1363   for ( i=0; i<mbs; i++ ) {
1364     n     = ii[1] - ii[0]; ii++;
1365     ncols = n*bs;
1366     workt = work;
1367     for ( j=0; j<n; j++ ) {
1368       xb = x + bs*(*idx++);
1369       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1370       workt += bs;
1371     }
1372     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1373     v += n*bs2;
1374     z += bs;
1375   }
1376   VecRestoreArray_Fast(xx,x);
1377   VecRestoreArray_Fast(zz,z);
1378   PLogFlops(2*a->nz*bs2 );
1379   return 0;
1380 }
1381 
1382 #undef __FUNC__
1383 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1384 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1385 {
1386   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1387   Scalar          *xg,*zg,*zb;
1388   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1389   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1390   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1391 
1392 
1393   VecGetArray_Fast(xx,xg); x = xg;
1394   VecGetArray_Fast(zz,zg); z = zg;
1395   PetscMemzero(z,N*sizeof(Scalar));
1396 
1397   idx   = a->j;
1398   v     = a->a;
1399   ii    = a->i;
1400 
1401   switch (bs) {
1402   case 1:
1403     for ( i=0; i<mbs; i++ ) {
1404       n  = ii[1] - ii[0]; ii++;
1405       xb = x + i; x1 = xb[0];
1406       ib = idx + ai[i];
1407       for ( j=0; j<n; j++ ) {
1408         rval    = ib[j];
1409         z[rval] += *v++ * x1;
1410       }
1411     }
1412     break;
1413   case 2:
1414     for ( i=0; i<mbs; i++ ) {
1415       n  = ii[1] - ii[0]; ii++;
1416       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1417       ib = idx + ai[i];
1418       for ( j=0; j<n; j++ ) {
1419         rval      = ib[j]*2;
1420         z[rval++] += v[0]*x1 + v[1]*x2;
1421         z[rval++] += v[2]*x1 + v[3]*x2;
1422         v += 4;
1423       }
1424     }
1425     break;
1426   case 3:
1427     for ( i=0; i<mbs; i++ ) {
1428       n  = ii[1] - ii[0]; ii++;
1429       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1430       ib = idx + ai[i];
1431       for ( j=0; j<n; j++ ) {
1432         rval      = ib[j]*3;
1433         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1434         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1435         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1436         v += 9;
1437       }
1438     }
1439     break;
1440   case 4:
1441     for ( i=0; i<mbs; i++ ) {
1442       n  = ii[1] - ii[0]; ii++;
1443       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1444       ib = idx + ai[i];
1445       for ( j=0; j<n; j++ ) {
1446         rval      = ib[j]*4;
1447         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1448         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1449         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1450         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1451         v += 16;
1452       }
1453     }
1454     break;
1455   case 5:
1456     for ( i=0; i<mbs; i++ ) {
1457       n  = ii[1] - ii[0]; ii++;
1458       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1459       x4 = xb[3];   x5 = xb[4];
1460       ib = idx + ai[i];
1461       for ( j=0; j<n; j++ ) {
1462         rval      = ib[j]*5;
1463         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1464         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1465         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1466         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1467         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1468         v += 25;
1469       }
1470     }
1471     break;
1472       /* block sizes larger then 5 by 5 are handled by BLAS */
1473     default: {
1474       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1475       if (!a->mult_work) {
1476         k = PetscMax(a->m,a->n);
1477         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1478         CHKPTRQ(a->mult_work);
1479       }
1480       work = a->mult_work;
1481       for ( i=0; i<mbs; i++ ) {
1482         n     = ii[1] - ii[0]; ii++;
1483         ncols = n*bs;
1484         PetscMemzero(work,ncols*sizeof(Scalar));
1485         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1486         v += n*bs2;
1487         x += bs;
1488         workt = work;
1489         for ( j=0; j<n; j++ ) {
1490           zb = z + bs*(*idx++);
1491           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1492           workt += bs;
1493         }
1494       }
1495     }
1496   }
1497   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1498   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1499   PLogFlops(2*a->nz*a->bs2 - a->n);
1500   return 0;
1501 }
1502 
1503 #undef __FUNC__
1504 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1505 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1506 {
1507   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1508   Scalar          *xg,*zg,*zb;
1509   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1510   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1511   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1512 
1513 
1514 
1515   VecGetArray_Fast(xx,xg); x = xg;
1516   VecGetArray_Fast(zz,zg); z = zg;
1517 
1518   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1519   else PetscMemzero(z,N*sizeof(Scalar));
1520 
1521 
1522   idx   = a->j;
1523   v     = a->a;
1524   ii    = a->i;
1525 
1526   switch (bs) {
1527   case 1:
1528     for ( i=0; i<mbs; i++ ) {
1529       n  = ii[1] - ii[0]; ii++;
1530       xb = x + i; x1 = xb[0];
1531       ib = idx + ai[i];
1532       for ( j=0; j<n; j++ ) {
1533         rval    = ib[j];
1534         z[rval] += *v++ * x1;
1535       }
1536     }
1537     break;
1538   case 2:
1539     for ( i=0; i<mbs; i++ ) {
1540       n  = ii[1] - ii[0]; ii++;
1541       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1542       ib = idx + ai[i];
1543       for ( j=0; j<n; j++ ) {
1544         rval      = ib[j]*2;
1545         z[rval++] += v[0]*x1 + v[1]*x2;
1546         z[rval++] += v[2]*x1 + v[3]*x2;
1547         v += 4;
1548       }
1549     }
1550     break;
1551   case 3:
1552     for ( i=0; i<mbs; i++ ) {
1553       n  = ii[1] - ii[0]; ii++;
1554       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1555       ib = idx + ai[i];
1556       for ( j=0; j<n; j++ ) {
1557         rval      = ib[j]*3;
1558         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1559         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1560         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1561         v += 9;
1562       }
1563     }
1564     break;
1565   case 4:
1566     for ( i=0; i<mbs; i++ ) {
1567       n  = ii[1] - ii[0]; ii++;
1568       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1569       ib = idx + ai[i];
1570       for ( j=0; j<n; j++ ) {
1571         rval      = ib[j]*4;
1572         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1573         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1574         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1575         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1576         v += 16;
1577       }
1578     }
1579     break;
1580   case 5:
1581     for ( i=0; i<mbs; i++ ) {
1582       n  = ii[1] - ii[0]; ii++;
1583       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1584       x4 = xb[3];   x5 = xb[4];
1585       ib = idx + ai[i];
1586       for ( j=0; j<n; j++ ) {
1587         rval      = ib[j]*5;
1588         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1589         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1590         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1591         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1592         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1593         v += 25;
1594       }
1595     }
1596     break;
1597       /* block sizes larger then 5 by 5 are handled by BLAS */
1598     default: {
1599       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1600       if (!a->mult_work) {
1601         k = PetscMax(a->m,a->n);
1602         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1603         CHKPTRQ(a->mult_work);
1604       }
1605       work = a->mult_work;
1606       for ( i=0; i<mbs; i++ ) {
1607         n     = ii[1] - ii[0]; ii++;
1608         ncols = n*bs;
1609         PetscMemzero(work,ncols*sizeof(Scalar));
1610         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1611         v += n*bs2;
1612         x += bs;
1613         workt = work;
1614         for ( j=0; j<n; j++ ) {
1615           zb = z + bs*(*idx++);
1616           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1617           workt += bs;
1618         }
1619       }
1620     }
1621   }
1622   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1623   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1624   PLogFlops(2*a->nz*a->bs2);
1625   return 0;
1626 }
1627 
1628 #undef __FUNC__
1629 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */
1630 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1631 {
1632   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1633 
1634   info->rows_global    = (double)a->m;
1635   info->columns_global = (double)a->n;
1636   info->rows_local     = (double)a->m;
1637   info->columns_local  = (double)a->n;
1638   info->block_size     = a->bs2;
1639   info->nz_allocated   = a->maxnz;
1640   info->nz_used        = a->bs2*a->nz;
1641   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1642   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1643     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1644   info->assemblies   = A->num_ass;
1645   info->mallocs      = a->reallocs;
1646   info->memory       = A->mem;
1647   if (A->factor) {
1648     info->fill_ratio_given  = A->info.fill_ratio_given;
1649     info->fill_ratio_needed = A->info.fill_ratio_needed;
1650     info->factor_mallocs    = A->info.factor_mallocs;
1651   } else {
1652     info->fill_ratio_given  = 0;
1653     info->fill_ratio_needed = 0;
1654     info->factor_mallocs    = 0;
1655   }
1656   return 0;
1657 }
1658 
1659 #undef __FUNC__
1660 #define __FUNC__ "MatEqual_SeqBAIJ"
1661 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1662 {
1663   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1664 
1665   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1666 
1667   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1668   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1669       (a->nz != b->nz)) {
1670     *flg = PETSC_FALSE; return 0;
1671   }
1672 
1673   /* if the a->i are the same */
1674   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1675     *flg = PETSC_FALSE; return 0;
1676   }
1677 
1678   /* if a->j are the same */
1679   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1680     *flg = PETSC_FALSE; return 0;
1681   }
1682 
1683   /* if a->a are the same */
1684   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1685     *flg = PETSC_FALSE; return 0;
1686   }
1687   *flg = PETSC_TRUE;
1688   return 0;
1689 
1690 }
1691 
1692 #undef __FUNC__
1693 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1694 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1695 {
1696   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1697   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1698   Scalar      *x, zero = 0.0,*aa,*aa_j;
1699 
1700   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1701   bs   = a->bs;
1702   aa   = a->a;
1703   ai   = a->i;
1704   aj   = a->j;
1705   ambs = a->mbs;
1706   bs2  = a->bs2;
1707 
1708   VecSet(&zero,v);
1709   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1710   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1711   for ( i=0; i<ambs; i++ ) {
1712     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1713       if (aj[j] == i) {
1714         row  = i*bs;
1715         aa_j = aa+j*bs2;
1716         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1717         break;
1718       }
1719     }
1720   }
1721   return 0;
1722 }
1723 
1724 #undef __FUNC__
1725 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1726 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1727 {
1728   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1729   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1730   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1731 
1732   ai  = a->i;
1733   aj  = a->j;
1734   aa  = a->a;
1735   m   = a->m;
1736   n   = a->n;
1737   bs  = a->bs;
1738   mbs = a->mbs;
1739   bs2 = a->bs2;
1740   if (ll) {
1741     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1742     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1743     for ( i=0; i<mbs; i++ ) { /* for each block row */
1744       M  = ai[i+1] - ai[i];
1745       li = l + i*bs;
1746       v  = aa + bs2*ai[i];
1747       for ( j=0; j<M; j++ ) { /* for each block */
1748         for ( k=0; k<bs2; k++ ) {
1749           (*v++) *= li[k%bs];
1750         }
1751       }
1752     }
1753   }
1754 
1755   if (rr) {
1756     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1757     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1758     for ( i=0; i<mbs; i++ ) { /* for each block row */
1759       M  = ai[i+1] - ai[i];
1760       v  = aa + bs2*ai[i];
1761       for ( j=0; j<M; j++ ) { /* for each block */
1762         ri = r + bs*aj[ai[i]+j];
1763         for ( k=0; k<bs; k++ ) {
1764           x = ri[k];
1765           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1766         }
1767       }
1768     }
1769   }
1770   return 0;
1771 }
1772 
1773 
1774 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1775 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1776 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1777 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1778 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1779 
1780 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1781 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1782 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1783 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1784 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1785 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1786 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1787 
1788 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1789 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1790 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1791 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1792 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1793 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1794 
1795 #undef __FUNC__
1796 #define __FUNC__ "MatNorm_SeqBAIJ"
1797 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1798 {
1799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1800   Scalar      *v = a->a;
1801   double      sum = 0.0;
1802   int         i,nz=a->nz,bs2=a->bs2;
1803 
1804   if (type == NORM_FROBENIUS) {
1805     for (i=0; i< bs2*nz; i++ ) {
1806 #if defined(PETSC_COMPLEX)
1807       sum += real(conj(*v)*(*v)); v++;
1808 #else
1809       sum += (*v)*(*v); v++;
1810 #endif
1811     }
1812     *norm = sqrt(sum);
1813   }
1814   else {
1815     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1816   }
1817   return 0;
1818 }
1819 
1820 /*
1821      note: This can only work for identity for row and col. It would
1822    be good to check this and otherwise generate an error.
1823 */
1824 #undef __FUNC__
1825 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1826 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1827 {
1828   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1829   Mat         outA;
1830   int         ierr;
1831 
1832   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1833 
1834   outA          = inA;
1835   inA->factor   = FACTOR_LU;
1836   a->row        = row;
1837   a->col        = col;
1838 
1839   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1840 
1841   if (!a->diag) {
1842     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1843   }
1844   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1845   return 0;
1846 }
1847 
1848 #undef __FUNC__
1849 #define __FUNC__ "MatScale_SeqBAIJ"
1850 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1851 {
1852   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1853   int         one = 1, totalnz = a->bs2*a->nz;
1854   BLscal_( &totalnz, alpha, a->a, &one );
1855   PLogFlops(totalnz);
1856   return 0;
1857 }
1858 
1859 #undef __FUNC__
1860 #define __FUNC__ "MatGetValues_SeqBAIJ"
1861 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1862 {
1863   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1864   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1865   int        *ai = a->i, *ailen = a->ilen;
1866   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1867   Scalar     *ap, *aa = a->a, zero = 0.0;
1868 
1869   for ( k=0; k<m; k++ ) { /* loop over rows */
1870     row  = im[k]; brow = row/bs;
1871     if (row < 0) SETERRQ(1,0,"Negative row");
1872     if (row >= a->m) SETERRQ(1,0,"Row too large");
1873     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1874     nrow = ailen[brow];
1875     for ( l=0; l<n; l++ ) { /* loop over columns */
1876       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1877       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1878       col  = in[l] ;
1879       bcol = col/bs;
1880       cidx = col%bs;
1881       ridx = row%bs;
1882       high = nrow;
1883       low  = 0; /* assume unsorted */
1884       while (high-low > 5) {
1885         t = (low+high)/2;
1886         if (rp[t] > bcol) high = t;
1887         else             low  = t;
1888       }
1889       for ( i=low; i<high; i++ ) {
1890         if (rp[i] > bcol) break;
1891         if (rp[i] == bcol) {
1892           *v++ = ap[bs2*i+bs*cidx+ridx];
1893           goto finished;
1894         }
1895       }
1896       *v++ = zero;
1897       finished:;
1898     }
1899   }
1900   return 0;
1901 }
1902 
1903 #undef __FUNC__
1904 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */
1905 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1906 {
1907   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1908   *bs = baij->bs;
1909   return 0;
1910 }
1911 
1912 /* idx should be of length atlease bs */
1913 #undef __FUNC__
1914 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1915 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1916 {
1917   int i,row;
1918   row = idx[0];
1919   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1920 
1921   for ( i=1; i<bs; i++ ) {
1922     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1923   }
1924   *flg = PETSC_TRUE;
1925   return 0;
1926 }
1927 
1928 #undef __FUNC__
1929 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1930 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1931 {
1932   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1933   IS          is_local;
1934   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1935   PetscTruth  flg;
1936   Scalar      zero = 0.0,*aa;
1937 
1938   /* Make a copy of the IS and  sort it */
1939   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1940   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1941   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1942   ierr = ISSort(is_local); CHKERRQ(ierr);
1943   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1944 
1945   i = 0;
1946   while (i < is_n) {
1947     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1948     flg = PETSC_FALSE;
1949     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1950     if (flg) { /* There exists a block of rows to be Zerowed */
1951       baij->ilen[rows[i]/bs] = 0;
1952       i += bs;
1953     } else { /* Zero out only the requested row */
1954       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1955       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1956       for ( j=0; j<count; j++ ) {
1957         aa[0] = zero;
1958         aa+=bs;
1959       }
1960       i++;
1961     }
1962   }
1963   if (diag) {
1964     for ( j=0; j<is_n; j++ ) {
1965       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1966     }
1967   }
1968   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1969   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1970   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1971   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1972 
1973   return 0;
1974 }
1975 
1976 #undef __FUNC__
1977 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */
1978 int MatPrintHelp_SeqBAIJ(Mat A)
1979 {
1980   static int called = 0;
1981   MPI_Comm   comm = A->comm;
1982 
1983   if (called) return 0; else called = 1;
1984   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1985   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1986   return 0;
1987 }
1988 
1989 /* -------------------------------------------------------------------*/
1990 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1991        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1992        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1993        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1994        MatSolve_SeqBAIJ_N,0,
1995        0,0,
1996        MatLUFactor_SeqBAIJ,0,
1997        0,
1998        MatTranspose_SeqBAIJ,
1999        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
2000        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2001        0,MatAssemblyEnd_SeqBAIJ,
2002        0,
2003        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
2004        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2005        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2006        MatILUFactorSymbolic_SeqBAIJ,0,
2007        0,0,
2008        MatConvertSameType_SeqBAIJ,0,0,
2009        MatILUFactor_SeqBAIJ,0,0,
2010        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
2011        MatGetValues_SeqBAIJ,0,
2012        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
2013        0,0,0,MatGetBlockSize_SeqBAIJ,
2014        MatGetRowIJ_SeqBAIJ,
2015        MatRestoreRowIJ_SeqBAIJ,
2016        0,0,0,0,0,0,
2017        MatSetValuesBlocked_SeqBAIJ};
2018 
2019 #undef __FUNC__
2020 #define __FUNC__ "MatCreateSeqBAIJ"
2021 /*@C
2022    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2023    compressed row) format.  For good matrix assembly performance the
2024    user should preallocate the matrix storage by setting the parameter nz
2025    (or the array nzz).  By setting these parameters accurately, performance
2026    during matrix assembly can be increased by more than a factor of 50.
2027 
2028    Input Parameters:
2029 .  comm - MPI communicator, set to MPI_COMM_SELF
2030 .  bs - size of block
2031 .  m - number of rows
2032 .  n - number of columns
2033 .  nz - number of block nonzeros per block row (same for all rows)
2034 .  nzz - array containing the number of block nonzeros in the various block rows
2035          (possibly different for each block row) or PETSC_NULL
2036 
2037    Output Parameter:
2038 .  A - the matrix
2039 
2040    Options Database Keys:
2041 $    -mat_no_unroll - uses code that does not unroll the loops in the
2042 $                     block calculations (much solver)
2043 $    -mat_block_size - size of the blocks to use
2044 
2045    Notes:
2046    The block AIJ format is fully compatible with standard Fortran 77
2047    storage.  That is, the stored row and column indices can begin at
2048    either one (as in Fortran) or zero.  See the users' manual for details.
2049 
2050    Specify the preallocated storage with either nz or nnz (not both).
2051    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2052    allocation.  For additional details, see the users manual chapter on
2053    matrices.
2054 
2055 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2056 @*/
2057 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
2058 {
2059   Mat         B;
2060   Mat_SeqBAIJ *b;
2061   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
2062 
2063   MPI_Comm_size(comm,&size);
2064   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2065 
2066   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
2067 
2068   if (mbs*bs!=m || nbs*bs!=n)
2069     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
2070 
2071   *A = 0;
2072   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
2073   PLogObjectCreate(B);
2074   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
2075   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2076   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
2077   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
2078   if (!flg) {
2079     switch (bs) {
2080     case 1:
2081       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2082       B->ops.solve           = MatSolve_SeqBAIJ_1;
2083       B->ops.mult            = MatMult_SeqBAIJ_1;
2084       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
2085       break;
2086     case 2:
2087       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2088       B->ops.solve           = MatSolve_SeqBAIJ_2;
2089       B->ops.mult            = MatMult_SeqBAIJ_2;
2090       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
2091       break;
2092     case 3:
2093       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2094       B->ops.solve           = MatSolve_SeqBAIJ_3;
2095       B->ops.mult            = MatMult_SeqBAIJ_3;
2096       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
2097       break;
2098     case 4:
2099       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2100       B->ops.solve           = MatSolve_SeqBAIJ_4;
2101       B->ops.mult            = MatMult_SeqBAIJ_4;
2102       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
2103       break;
2104     case 5:
2105       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2106       B->ops.solve           = MatSolve_SeqBAIJ_5;
2107       B->ops.mult            = MatMult_SeqBAIJ_5;
2108       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
2109       break;
2110     case 7:
2111       B->ops.mult            = MatMult_SeqBAIJ_7;
2112       B->ops.solve           = MatSolve_SeqBAIJ_7;
2113       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2114       break;
2115     }
2116   }
2117   B->destroy          = MatDestroy_SeqBAIJ;
2118   B->view             = MatView_SeqBAIJ;
2119   B->factor           = 0;
2120   B->lupivotthreshold = 1.0;
2121   B->mapping          = 0;
2122   b->row              = 0;
2123   b->col              = 0;
2124   b->reallocs         = 0;
2125 
2126   b->m       = m; B->m = m; B->M = m;
2127   b->n       = n; B->n = n; B->N = n;
2128   b->mbs     = mbs;
2129   b->nbs     = nbs;
2130   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2131   if (nnz == PETSC_NULL) {
2132     if (nz == PETSC_DEFAULT) nz = 5;
2133     else if (nz <= 0)        nz = 1;
2134     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2135     nz = nz*mbs;
2136   }
2137   else {
2138     nz = 0;
2139     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2140   }
2141 
2142   /* allocate the matrix space */
2143   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2144   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2145   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2146   b->j  = (int *) (b->a + nz*bs2);
2147   PetscMemzero(b->j,nz*sizeof(int));
2148   b->i  = b->j + nz;
2149   b->singlemalloc = 1;
2150 
2151   b->i[0] = 0;
2152   for (i=1; i<mbs+1; i++) {
2153     b->i[i] = b->i[i-1] + b->imax[i-1];
2154   }
2155 
2156   /* b->ilen will count nonzeros in each block row so far. */
2157   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2158   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2159   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2160 
2161   b->bs               = bs;
2162   b->bs2              = bs2;
2163   b->mbs              = mbs;
2164   b->nz               = 0;
2165   b->maxnz            = nz*bs2;
2166   b->sorted           = 0;
2167   b->roworiented      = 1;
2168   b->nonew            = 0;
2169   b->diag             = 0;
2170   b->solve_work       = 0;
2171   b->mult_work        = 0;
2172   b->spptr            = 0;
2173   B->info.nz_unneeded = (double)b->maxnz;
2174 
2175   *A = B;
2176   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2177   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2178   return 0;
2179 }
2180 
2181 #undef __FUNC__
2182 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2183 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2184 {
2185   Mat         C;
2186   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2187   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2188 
2189   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
2190 
2191   *B = 0;
2192   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
2193   PLogObjectCreate(C);
2194   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2195   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2196   C->destroy    = MatDestroy_SeqBAIJ;
2197   C->view       = MatView_SeqBAIJ;
2198   C->factor     = A->factor;
2199   c->row        = 0;
2200   c->col        = 0;
2201   C->assembled  = PETSC_TRUE;
2202 
2203   c->m = C->m   = a->m;
2204   c->n = C->n   = a->n;
2205   C->M          = a->m;
2206   C->N          = a->n;
2207 
2208   c->bs         = a->bs;
2209   c->bs2        = a->bs2;
2210   c->mbs        = a->mbs;
2211   c->nbs        = a->nbs;
2212 
2213   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2214   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2215   for ( i=0; i<mbs; i++ ) {
2216     c->imax[i] = a->imax[i];
2217     c->ilen[i] = a->ilen[i];
2218   }
2219 
2220   /* allocate the matrix space */
2221   c->singlemalloc = 1;
2222   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2223   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2224   c->j  = (int *) (c->a + nz*bs2);
2225   c->i  = c->j + nz;
2226   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2227   if (mbs > 0) {
2228     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2229     if (cpvalues == COPY_VALUES) {
2230       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2231     }
2232   }
2233 
2234   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2235   c->sorted      = a->sorted;
2236   c->roworiented = a->roworiented;
2237   c->nonew       = a->nonew;
2238 
2239   if (a->diag) {
2240     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2241     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2242     for ( i=0; i<mbs; i++ ) {
2243       c->diag[i] = a->diag[i];
2244     }
2245   }
2246   else c->diag          = 0;
2247   c->nz                 = a->nz;
2248   c->maxnz              = a->maxnz;
2249   c->solve_work         = 0;
2250   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2251   c->mult_work          = 0;
2252   *B = C;
2253   return 0;
2254 }
2255 
2256 #undef __FUNC__
2257 #define __FUNC__ "MatLoad_SeqBAIJ"
2258 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2259 {
2260   Mat_SeqBAIJ  *a;
2261   Mat          B;
2262   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2263   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2264   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2265   int          *masked, nmask,tmp,bs2,ishift;
2266   Scalar       *aa;
2267   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2268 
2269   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2270   bs2  = bs*bs;
2271 
2272   MPI_Comm_size(comm,&size);
2273   if (size > 1) SETERRQ(1,0,"view must have one processor");
2274   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2275   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2276   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2277   M = header[1]; N = header[2]; nz = header[3];
2278 
2279   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2280 
2281   /*
2282      This code adds extra rows to make sure the number of rows is
2283     divisible by the blocksize
2284   */
2285   mbs        = M/bs;
2286   extra_rows = bs - M + bs*(mbs);
2287   if (extra_rows == bs) extra_rows = 0;
2288   else                  mbs++;
2289   if (extra_rows) {
2290     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2291   }
2292 
2293   /* read in row lengths */
2294   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2295   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2296   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2297 
2298   /* read in column indices */
2299   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2300   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2301   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2302 
2303   /* loop over row lengths determining block row lengths */
2304   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2305   PetscMemzero(browlengths,mbs*sizeof(int));
2306   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2307   PetscMemzero(mask,mbs*sizeof(int));
2308   masked = mask + mbs;
2309   rowcount = 0; nzcount = 0;
2310   for ( i=0; i<mbs; i++ ) {
2311     nmask = 0;
2312     for ( j=0; j<bs; j++ ) {
2313       kmax = rowlengths[rowcount];
2314       for ( k=0; k<kmax; k++ ) {
2315         tmp = jj[nzcount++]/bs;
2316         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2317       }
2318       rowcount++;
2319     }
2320     browlengths[i] += nmask;
2321     /* zero out the mask elements we set */
2322     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2323   }
2324 
2325   /* create our matrix */
2326   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2327          CHKERRQ(ierr);
2328   B = *A;
2329   a = (Mat_SeqBAIJ *) B->data;
2330 
2331   /* set matrix "i" values */
2332   a->i[0] = 0;
2333   for ( i=1; i<= mbs; i++ ) {
2334     a->i[i]      = a->i[i-1] + browlengths[i-1];
2335     a->ilen[i-1] = browlengths[i-1];
2336   }
2337   a->nz         = 0;
2338   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2339 
2340   /* read in nonzero values */
2341   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2342   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2343   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2344 
2345   /* set "a" and "j" values into matrix */
2346   nzcount = 0; jcount = 0;
2347   for ( i=0; i<mbs; i++ ) {
2348     nzcountb = nzcount;
2349     nmask    = 0;
2350     for ( j=0; j<bs; j++ ) {
2351       kmax = rowlengths[i*bs+j];
2352       for ( k=0; k<kmax; k++ ) {
2353         tmp = jj[nzcount++]/bs;
2354 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2355       }
2356       rowcount++;
2357     }
2358     /* sort the masked values */
2359     PetscSortInt(nmask,masked);
2360 
2361     /* set "j" values into matrix */
2362     maskcount = 1;
2363     for ( j=0; j<nmask; j++ ) {
2364       a->j[jcount++]  = masked[j];
2365       mask[masked[j]] = maskcount++;
2366     }
2367     /* set "a" values into matrix */
2368     ishift = bs2*a->i[i];
2369     for ( j=0; j<bs; j++ ) {
2370       kmax = rowlengths[i*bs+j];
2371       for ( k=0; k<kmax; k++ ) {
2372         tmp    = jj[nzcountb]/bs ;
2373         block  = mask[tmp] - 1;
2374         point  = jj[nzcountb] - bs*tmp;
2375         idx    = ishift + bs2*block + j + bs*point;
2376         a->a[idx] = aa[nzcountb++];
2377       }
2378     }
2379     /* zero out the mask elements we set */
2380     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2381   }
2382   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2383 
2384   PetscFree(rowlengths);
2385   PetscFree(browlengths);
2386   PetscFree(aa);
2387   PetscFree(jj);
2388   PetscFree(mask);
2389 
2390   B->assembled = PETSC_TRUE;
2391 
2392   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2393   if (flg) {
2394     Viewer tviewer;
2395     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2396     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2397     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2398     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2399   }
2400   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2401   if (flg) {
2402     Viewer tviewer;
2403     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2404     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2405     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2406     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2407   }
2408   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2409   if (flg) {
2410     Viewer tviewer;
2411     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2412     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2413     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2414   }
2415   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2416   if (flg) {
2417     Viewer tviewer;
2418     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2419     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2420     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2421     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2422   }
2423   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2424   if (flg) {
2425     Viewer tviewer;
2426     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2427     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2428     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2429     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2430   }
2431   return 0;
2432 }
2433 
2434 
2435 
2436