xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 92c4ed94046817a2881466ca8f206a5effe0c57b)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.86 1997/01/29 19:45:09 balay Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 #undef __FUNC__
20 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21 int MatMarkDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         i,j, *diag, m = a->mbs;
25 
26   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27   PLogObjectMemory(A,(m+1)*sizeof(int));
28   for ( i=0; i<m; i++ ) {
29     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30       if (a->j[j] == i) {
31         diag[i] = j;
32         break;
33       }
34     }
35   }
36   a->diag = diag;
37   return 0;
38 }
39 
40 #include "draw.h"
41 #include "pinclude/pviewer.h"
42 #include "sys.h"
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary"
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw"
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ"
346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert;
418         }
419       }
420       if (nonew) goto noinsert;
421       if (nrow >= rmax) {
422         /* there is no extra room in row, therefore enlarge */
423         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424         Scalar *new_a;
425 
426         /* malloc new storage space */
427         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
429         new_j   = (int *) (new_a + bs2*new_nz);
430         new_i   = new_j + new_nz;
431 
432         /* copy over old data into new slots */
433         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
434         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438                                                            len*sizeof(int));
439         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443         /* free up old matrix storage */
444         PetscFree(a->a);
445         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447         a->singlemalloc = 1;
448 
449         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
451         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
452         a->maxnz += bs2*CHUNKSIZE;
453         a->reallocs++;
454         a->nz++;
455       }
456       N = nrow++ - 1;
457       /* shift up all the later entries in this row */
458       for ( ii=N; ii>=i; ii-- ) {
459         rp[ii+1] = rp[ii];
460         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461       }
462       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463       rp[i]                      = bcol;
464       ap[bs2*i + bs*cidx + ridx] = value;
465       noinsert:;
466       low = i;
467     }
468     ailen[brow] = nrow;
469   }
470   return 0;
471 }
472 
473 #undef __FUNC__
474 #define __FUNC__ "MatSetValues_SeqBAIJ"
475 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
476 {
477   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
478   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
479   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
480   int         *aj=a->j,nonew=a->nonew,bs=a->bs;
481   int          ridx,cidx,bs2=a->bs2;
482   Scalar      *ap,value,*aa=a->a,*bap;
483 
484   for ( k=0; k<m; k++ ) { /* loop over added rows */
485     row  = im[k];
486 #if defined(PETSC_BOPT_g)
487     if (row < 0) SETERRQ(1,0,"Negative row");
488     if (row >= a->m) SETERRQ(1,0,"Row too large");
489 #endif
490     rp   = aj + ai[row];
491     ap   = aa + bs2*ai[row];
492     rmax = imax[row];
493     nrow = ailen[row];
494     low  = 0;
495     for ( l=0; l<n; l++ ) { /* loop over added columns */
496 #if defined(PETSC_BOPT_g)
497       if (in[l] < 0) SETERRQ(1,0,"Negative column");
498       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
499 #endif
500       col = in[l];
501       ridx = row % bs; cidx = col % bs;
502       if (roworiented) {
503         value = *v++;
504       }
505       else {
506         value = v[k + l*m];
507       }
508       if (!sorted) low = 0; high = nrow;
509       while (high-low > 7) {
510         t = (low+high)/2;
511         if (rp[t] > col) high = t;
512         else             low  = t;
513       }
514       for ( i=low; i<high; i++ ) {
515         if (rp[i] > col) break;
516         if (rp[i] == col) {
517           bap  = ap +  bs2*i + bs*cidx + ridx;
518           if (is == ADD_VALUES) *bap += value;
519           else                  *bap  = value;
520           goto noinsert;
521         }
522       }
523       if (nonew) goto noinsert;
524       if (nrow >= rmax) {
525         /* there is no extra room in row, therefore enlarge */
526         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
527         Scalar *new_a;
528 
529         /* malloc new storage space */
530         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
531         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
532         new_j   = (int *) (new_a + bs2*new_nz);
533         new_i   = new_j + new_nz;
534 
535         /* copy over old data into new slots */
536         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
537         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
538         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
539         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
540         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
541                                                            len*sizeof(int));
542         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
543         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
544         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
545                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
546         /* free up old matrix storage */
547         PetscFree(a->a);
548         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
549         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
550         a->singlemalloc = 1;
551 
552         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
553         rmax = imax[row] = imax[row] + CHUNKSIZE;
554         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
555         a->maxnz += bs2*CHUNKSIZE;
556         a->reallocs++;
557         a->nz++;
558       }
559       N = nrow++ - 1;
560       /* shift up all the later entries in this row */
561       for ( ii=N; ii>=i; ii-- ) {
562         rp[ii+1] = rp[ii];
563         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
564       }
565       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
566       rp[i]                      = col;
567       ap[bs2*i + bs*cidx + ridx] = value;
568       noinsert:;
569       low = i;
570     }
571     ailen[row] = nrow;
572   }
573   return 0;
574 }
575 
576 #undef __FUNC__
577 #define __FUNC__ "MatGetSize_SeqBAIJ"
578 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
579 {
580   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
581   *m = a->m; *n = a->n;
582   return 0;
583 }
584 
585 #undef __FUNC__
586 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
587 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
588 {
589   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
590   *m = 0; *n = a->m;
591   return 0;
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatGetRow_SeqBAIJ"
596 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
597 {
598   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
599   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
600   Scalar      *aa,*v_i,*aa_i;
601 
602   bs  = a->bs;
603   ai  = a->i;
604   aj  = a->j;
605   aa  = a->a;
606   bs2 = a->bs2;
607 
608   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
609 
610   bn  = row/bs;   /* Block number */
611   bp  = row % bs; /* Block Position */
612   M   = ai[bn+1] - ai[bn];
613   *nz = bs*M;
614 
615   if (v) {
616     *v = 0;
617     if (*nz) {
618       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
619       for ( i=0; i<M; i++ ) { /* for each block in the block row */
620         v_i  = *v + i*bs;
621         aa_i = aa + bs2*(ai[bn] + i);
622         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
623       }
624     }
625   }
626 
627   if (idx) {
628     *idx = 0;
629     if (*nz) {
630       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
631       for ( i=0; i<M; i++ ) { /* for each block in the block row */
632         idx_i = *idx + i*bs;
633         itmp  = bs*aj[ai[bn] + i];
634         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
635       }
636     }
637   }
638   return 0;
639 }
640 
641 #undef __FUNC__
642 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
643 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
644 {
645   if (idx) {if (*idx) PetscFree(*idx);}
646   if (v)   {if (*v)   PetscFree(*v);}
647   return 0;
648 }
649 
650 #undef __FUNC__
651 #define __FUNC__ "MatTranspose_SeqBAIJ"
652 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
653 {
654   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
655   Mat         C;
656   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
657   int         *rows,*cols,bs2=a->bs2;
658   Scalar      *array=a->a;
659 
660   if (B==PETSC_NULL && mbs!=nbs)
661     SETERRQ(1,0,"Square matrix only for in-place");
662   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
663   PetscMemzero(col,(1+nbs)*sizeof(int));
664 
665   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
666   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
667   PetscFree(col);
668   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
669   cols = rows + bs;
670   for ( i=0; i<mbs; i++ ) {
671     cols[0] = i*bs;
672     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
673     len = ai[i+1] - ai[i];
674     for ( j=0; j<len; j++ ) {
675       rows[0] = (*aj++)*bs;
676       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
677       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
678       array += bs2;
679     }
680   }
681   PetscFree(rows);
682 
683   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
684   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
685 
686   if (B != PETSC_NULL) {
687     *B = C;
688   } else {
689     /* This isn't really an in-place transpose */
690     PetscFree(a->a);
691     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
692     if (a->diag) PetscFree(a->diag);
693     if (a->ilen) PetscFree(a->ilen);
694     if (a->imax) PetscFree(a->imax);
695     if (a->solve_work) PetscFree(a->solve_work);
696     PetscFree(a);
697     PetscMemcpy(A,C,sizeof(struct _Mat));
698     PetscHeaderDestroy(C);
699   }
700   return 0;
701 }
702 
703 
704 #undef __FUNC__
705 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
706 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
707 {
708   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
709   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
710   int        m = a->m,*ip, N, *ailen = a->ilen;
711   int        mbs = a->mbs, bs2 = a->bs2,rmax;
712   Scalar     *aa = a->a, *ap;
713 
714   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
715 
716   rmax = ailen[0];
717   for ( i=1; i<mbs; i++ ) {
718     /* move each row back by the amount of empty slots (fshift) before it*/
719     fshift += imax[i-1] - ailen[i-1];
720     rmax   = PetscMax(rmax,ailen[i]);
721     if (fshift) {
722       ip = aj + ai[i]; ap = aa + bs2*ai[i];
723       N = ailen[i];
724       for ( j=0; j<N; j++ ) {
725         ip[j-fshift] = ip[j];
726         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
727       }
728     }
729     ai[i] = ai[i-1] + ailen[i-1];
730   }
731   if (mbs) {
732     fshift += imax[mbs-1] - ailen[mbs-1];
733     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
734   }
735   /* reset ilen and imax for each row */
736   for ( i=0; i<mbs; i++ ) {
737     ailen[i] = imax[i] = ai[i+1] - ai[i];
738   }
739   a->nz = ai[mbs];
740 
741   /* diagonals may have moved, so kill the diagonal pointers */
742   if (fshift && a->diag) {
743     PetscFree(a->diag);
744     PLogObjectMemory(A,-(m+1)*sizeof(int));
745     a->diag = 0;
746   }
747   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
748            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
749   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
750            a->reallocs);
751   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
752   a->reallocs          = 0;
753   A->info.nz_unneeded  = (double)fshift*bs2;
754 
755   return 0;
756 }
757 
758 #undef __FUNC__
759 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
760 static int MatZeroEntries_SeqBAIJ(Mat A)
761 {
762   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
763   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
764   return 0;
765 }
766 
767 #undef __FUNC__
768 #define __FUNC__ "MatDestroy_SeqBAIJ"
769 int MatDestroy_SeqBAIJ(PetscObject obj)
770 {
771   Mat         A  = (Mat) obj;
772   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773   int         ierr;
774 
775 #if defined(PETSC_LOG)
776   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
777 #endif
778   PetscFree(a->a);
779   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
780   if (a->diag) PetscFree(a->diag);
781   if (a->ilen) PetscFree(a->ilen);
782   if (a->imax) PetscFree(a->imax);
783   if (a->solve_work) PetscFree(a->solve_work);
784   if (a->mult_work) PetscFree(a->mult_work);
785   PetscFree(a);
786   if (A->mapping) {
787     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
788   }
789   PLogObjectDestroy(A);
790   PetscHeaderDestroy(A);
791   return 0;
792 }
793 
794 #undef __FUNC__
795 #define __FUNC__ "MatSetOption_SeqBAIJ"
796 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
797 {
798   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
799   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
800   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
801   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
802   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
803   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
804   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
805   else if (op == MAT_ROWS_SORTED ||
806            op == MAT_ROWS_UNSORTED ||
807            op == MAT_SYMMETRIC ||
808            op == MAT_STRUCTURALLY_SYMMETRIC ||
809            op == MAT_YES_NEW_DIAGONALS ||
810            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
811     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
812   else if (op == MAT_NO_NEW_DIAGONALS)
813     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
814   else
815     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
816   return 0;
817 }
818 
819 
820 /* -------------------------------------------------------*/
821 /* Should check that shapes of vectors and matrices match */
822 /* -------------------------------------------------------*/
823 #include "pinclude/plapack.h"
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatMult_SeqBAIJ_1"
827 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
828 {
829   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
830   register Scalar *x,*z,*v,sum;
831   int             mbs=a->mbs,i,*idx,*ii,n;
832 
833   VecGetArray_Fast(xx,x);
834   VecGetArray_Fast(zz,z);
835 
836   idx   = a->j;
837   v     = a->a;
838   ii    = a->i;
839 
840   for ( i=0; i<mbs; i++ ) {
841     n    = ii[1] - ii[0]; ii++;
842     sum  = 0.0;
843     while (n--) sum += *v++ * x[*idx++];
844     z[i] = sum;
845   }
846   VecRestoreArray_Fast(xx,x);
847   VecRestoreArray_Fast(zz,z);
848   PLogFlops(2*a->nz - a->m);
849   return 0;
850 }
851 
852 #undef __FUNC__
853 #define __FUNC__ "MatMult_SeqBAIJ_2"
854 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
855 {
856   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
857   register Scalar *x,*z,*v,*xb,sum1,sum2;
858   register Scalar x1,x2;
859   int             mbs=a->mbs,i,*idx,*ii;
860   int             j,n;
861 
862   VecGetArray_Fast(xx,x);
863   VecGetArray_Fast(zz,z);
864 
865   idx   = a->j;
866   v     = a->a;
867   ii    = a->i;
868 
869   for ( i=0; i<mbs; i++ ) {
870     n  = ii[1] - ii[0]; ii++;
871     sum1 = 0.0; sum2 = 0.0;
872     for ( j=0; j<n; j++ ) {
873       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
874       sum1 += v[0]*x1 + v[2]*x2;
875       sum2 += v[1]*x1 + v[3]*x2;
876       v += 4;
877     }
878     z[0] = sum1; z[1] = sum2;
879     z += 2;
880   }
881   VecRestoreArray_Fast(xx,x);
882   VecRestoreArray_Fast(zz,z);
883   PLogFlops(4*a->nz - a->m);
884   return 0;
885 }
886 
887 #undef __FUNC__
888 #define __FUNC__ "MatMult_SeqBAIJ_3"
889 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
890 {
891   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
892   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
893   int             mbs=a->mbs,i,*idx,*ii,j,n;
894 
895   VecGetArray_Fast(xx,x);
896   VecGetArray_Fast(zz,z);
897 
898   idx   = a->j;
899   v     = a->a;
900   ii    = a->i;
901 
902   for ( i=0; i<mbs; i++ ) {
903     n  = ii[1] - ii[0]; ii++;
904     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
905     for ( j=0; j<n; j++ ) {
906       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
907       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
908       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
909       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
910       v += 9;
911     }
912     z[0] = sum1; z[1] = sum2; z[2] = sum3;
913     z += 3;
914   }
915   VecRestoreArray_Fast(xx,x);
916   VecRestoreArray_Fast(zz,z);
917   PLogFlops(18*a->nz - a->m);
918   return 0;
919 }
920 
921 #undef __FUNC__
922 #define __FUNC__ "MatMult_SeqBAIJ_4"
923 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
924 {
925   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
926   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
927   register Scalar x1,x2,x3,x4;
928   int             mbs=a->mbs,i,*idx,*ii;
929   int             j,n;
930 
931   VecGetArray_Fast(xx,x);
932   VecGetArray_Fast(zz,z);
933 
934   idx   = a->j;
935   v     = a->a;
936   ii    = a->i;
937 
938   for ( i=0; i<mbs; i++ ) {
939     n  = ii[1] - ii[0]; ii++;
940     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
941     for ( j=0; j<n; j++ ) {
942       xb = x + 4*(*idx++);
943       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
944       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
945       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
946       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
947       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
948       v += 16;
949     }
950     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
951     z += 4;
952   }
953   VecRestoreArray_Fast(xx,x);
954   VecRestoreArray_Fast(zz,z);
955   PLogFlops(32*a->nz - a->m);
956   return 0;
957 }
958 
959 #undef __FUNC__
960 #define __FUNC__ "MatMult_SeqBAIJ_5"
961 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
962 {
963   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
964   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
965   register Scalar x1,x2,x3,x4,x5;
966   int             mbs=a->mbs,i,*idx,*ii,j,n;
967 
968   VecGetArray_Fast(xx,x);
969   VecGetArray_Fast(zz,z);
970 
971   idx   = a->j;
972   v     = a->a;
973   ii    = a->i;
974 
975   for ( i=0; i<mbs; i++ ) {
976     n  = ii[1] - ii[0]; ii++;
977     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
978     for ( j=0; j<n; j++ ) {
979       xb = x + 5*(*idx++);
980       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
981       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
982       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
983       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
984       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
985       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
986       v += 25;
987     }
988     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
989     z += 5;
990   }
991   VecRestoreArray_Fast(xx,x);
992   VecRestoreArray_Fast(zz,z);
993   PLogFlops(50*a->nz - a->m);
994   return 0;
995 }
996 
997 #undef __FUNC__
998 #define __FUNC__ "MatMult_SeqBAIJ_N"
999 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1000 {
1001   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1002   register Scalar *x,*z,*v,*xb;
1003   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1004   int             _One = 1,ncols,k;
1005   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1006 
1007   VecGetArray_Fast(xx,x);
1008   VecGetArray_Fast(zz,z);
1009 
1010   idx   = a->j;
1011   v     = a->a;
1012   ii    = a->i;
1013 
1014 
1015   if (!a->mult_work) {
1016     k = PetscMax(a->m,a->n);
1017     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1018   }
1019   work = a->mult_work;
1020   for ( i=0; i<mbs; i++ ) {
1021     n     = ii[1] - ii[0]; ii++;
1022     ncols = n*bs;
1023     workt = work;
1024     for ( j=0; j<n; j++ ) {
1025       xb = x + bs*(*idx++);
1026       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1027       workt += bs;
1028     }
1029     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1030     v += n*bs2;
1031     z += bs;
1032   }
1033   VecRestoreArray_Fast(xx,x);
1034   VecRestoreArray_Fast(zz,z);
1035   PLogFlops(2*a->nz*bs2 - a->m);
1036   return 0;
1037 }
1038 
1039 #undef __FUNC__
1040 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1041 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1044   register Scalar *x,*y,*z,*v,sum;
1045   int             mbs=a->mbs,i,*idx,*ii,n;
1046 
1047   VecGetArray_Fast(xx,x);
1048   VecGetArray_Fast(yy,y);
1049   VecGetArray_Fast(zz,z);
1050 
1051   idx   = a->j;
1052   v     = a->a;
1053   ii    = a->i;
1054 
1055   for ( i=0; i<mbs; i++ ) {
1056     n    = ii[1] - ii[0]; ii++;
1057     sum  = y[i];
1058     while (n--) sum += *v++ * x[*idx++];
1059     z[i] = sum;
1060   }
1061   VecRestoreArray_Fast(xx,x);
1062   VecRestoreArray_Fast(yy,y);
1063   VecRestoreArray_Fast(zz,z);
1064   PLogFlops(2*a->nz);
1065   return 0;
1066 }
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1070 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1071 {
1072   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1073   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1074   register Scalar x1,x2;
1075   int             mbs=a->mbs,i,*idx,*ii;
1076   int             j,n;
1077 
1078   VecGetArray_Fast(xx,x);
1079   VecGetArray_Fast(yy,y);
1080   VecGetArray_Fast(zz,z);
1081 
1082   idx   = a->j;
1083   v     = a->a;
1084   ii    = a->i;
1085 
1086   for ( i=0; i<mbs; i++ ) {
1087     n  = ii[1] - ii[0]; ii++;
1088     sum1 = y[0]; sum2 = y[1];
1089     for ( j=0; j<n; j++ ) {
1090       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1091       sum1 += v[0]*x1 + v[2]*x2;
1092       sum2 += v[1]*x1 + v[3]*x2;
1093       v += 4;
1094     }
1095     z[0] = sum1; z[1] = sum2;
1096     z += 2; y += 2;
1097   }
1098   VecRestoreArray_Fast(xx,x);
1099   VecRestoreArray_Fast(yy,y);
1100   VecRestoreArray_Fast(zz,z);
1101   PLogFlops(4*a->nz);
1102   return 0;
1103 }
1104 
1105 #undef __FUNC__
1106 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1107 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1110   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1111   int             mbs=a->mbs,i,*idx,*ii,j,n;
1112 
1113   VecGetArray_Fast(xx,x);
1114   VecGetArray_Fast(yy,y);
1115   VecGetArray_Fast(zz,z);
1116 
1117   idx   = a->j;
1118   v     = a->a;
1119   ii    = a->i;
1120 
1121   for ( i=0; i<mbs; i++ ) {
1122     n  = ii[1] - ii[0]; ii++;
1123     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1124     for ( j=0; j<n; j++ ) {
1125       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1126       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1127       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1128       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1129       v += 9;
1130     }
1131     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1132     z += 3; y += 3;
1133   }
1134   VecRestoreArray_Fast(xx,x);
1135   VecRestoreArray_Fast(yy,y);
1136   VecRestoreArray_Fast(zz,z);
1137   PLogFlops(18*a->nz);
1138   return 0;
1139 }
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1143 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1144 {
1145   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1146   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1147   register Scalar x1,x2,x3,x4;
1148   int             mbs=a->mbs,i,*idx,*ii;
1149   int             j,n;
1150 
1151   VecGetArray_Fast(xx,x);
1152   VecGetArray_Fast(yy,y);
1153   VecGetArray_Fast(zz,z);
1154 
1155   idx   = a->j;
1156   v     = a->a;
1157   ii    = a->i;
1158 
1159   for ( i=0; i<mbs; i++ ) {
1160     n  = ii[1] - ii[0]; ii++;
1161     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1162     for ( j=0; j<n; j++ ) {
1163       xb = x + 4*(*idx++);
1164       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1165       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1166       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1167       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1168       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1169       v += 16;
1170     }
1171     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1172     z += 4; y += 4;
1173   }
1174   VecRestoreArray_Fast(xx,x);
1175   VecRestoreArray_Fast(yy,y);
1176   VecRestoreArray_Fast(zz,z);
1177   PLogFlops(32*a->nz);
1178   return 0;
1179 }
1180 
1181 #undef __FUNC__
1182 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1183 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1184 {
1185   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1186   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1187   register Scalar x1,x2,x3,x4,x5;
1188   int             mbs=a->mbs,i,*idx,*ii,j,n;
1189 
1190   VecGetArray_Fast(xx,x);
1191   VecGetArray_Fast(yy,y);
1192   VecGetArray_Fast(zz,z);
1193 
1194   idx   = a->j;
1195   v     = a->a;
1196   ii    = a->i;
1197 
1198   for ( i=0; i<mbs; i++ ) {
1199     n  = ii[1] - ii[0]; ii++;
1200     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1201     for ( j=0; j<n; j++ ) {
1202       xb = x + 5*(*idx++);
1203       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1204       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1205       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1206       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1207       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1208       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1209       v += 25;
1210     }
1211     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1212     z += 5; y += 5;
1213   }
1214   VecRestoreArray_Fast(xx,x);
1215   VecRestoreArray_Fast(yy,y);
1216   VecRestoreArray_Fast(zz,z);
1217   PLogFlops(50*a->nz);
1218   return 0;
1219 }
1220 
1221 #undef __FUNC__
1222 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1223 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1226   register Scalar *x,*z,*v,*xb;
1227   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1228   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1229 
1230   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1231 
1232   VecGetArray_Fast(xx,x);
1233   VecGetArray_Fast(zz,z);
1234 
1235   idx   = a->j;
1236   v     = a->a;
1237   ii    = a->i;
1238 
1239 
1240   if (!a->mult_work) {
1241     k = PetscMax(a->m,a->n);
1242     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1243   }
1244   work = a->mult_work;
1245   for ( i=0; i<mbs; i++ ) {
1246     n     = ii[1] - ii[0]; ii++;
1247     ncols = n*bs;
1248     workt = work;
1249     for ( j=0; j<n; j++ ) {
1250       xb = x + bs*(*idx++);
1251       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1252       workt += bs;
1253     }
1254     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1255     v += n*bs2;
1256     z += bs;
1257   }
1258   VecRestoreArray_Fast(xx,x);
1259   VecRestoreArray_Fast(zz,z);
1260   PLogFlops(2*a->nz*bs2 );
1261   return 0;
1262 }
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1266 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1267 {
1268   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1269   Scalar          *xg,*zg,*zb;
1270   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1271   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1272   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1273 
1274 
1275   VecGetArray_Fast(xx,xg); x = xg;
1276   VecGetArray_Fast(zz,zg); z = zg;
1277   PetscMemzero(z,N*sizeof(Scalar));
1278 
1279   idx   = a->j;
1280   v     = a->a;
1281   ii    = a->i;
1282 
1283   switch (bs) {
1284   case 1:
1285     for ( i=0; i<mbs; i++ ) {
1286       n  = ii[1] - ii[0]; ii++;
1287       xb = x + i; x1 = xb[0];
1288       ib = idx + ai[i];
1289       for ( j=0; j<n; j++ ) {
1290         rval    = ib[j];
1291         z[rval] += *v++ * x1;
1292       }
1293     }
1294     break;
1295   case 2:
1296     for ( i=0; i<mbs; i++ ) {
1297       n  = ii[1] - ii[0]; ii++;
1298       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1299       ib = idx + ai[i];
1300       for ( j=0; j<n; j++ ) {
1301         rval      = ib[j]*2;
1302         z[rval++] += v[0]*x1 + v[1]*x2;
1303         z[rval++] += v[2]*x1 + v[3]*x2;
1304         v += 4;
1305       }
1306     }
1307     break;
1308   case 3:
1309     for ( i=0; i<mbs; i++ ) {
1310       n  = ii[1] - ii[0]; ii++;
1311       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1312       ib = idx + ai[i];
1313       for ( j=0; j<n; j++ ) {
1314         rval      = ib[j]*3;
1315         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1316         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1317         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1318         v += 9;
1319       }
1320     }
1321     break;
1322   case 4:
1323     for ( i=0; i<mbs; i++ ) {
1324       n  = ii[1] - ii[0]; ii++;
1325       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1326       ib = idx + ai[i];
1327       for ( j=0; j<n; j++ ) {
1328         rval      = ib[j]*4;
1329         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1330         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1331         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1332         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1333         v += 16;
1334       }
1335     }
1336     break;
1337   case 5:
1338     for ( i=0; i<mbs; i++ ) {
1339       n  = ii[1] - ii[0]; ii++;
1340       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1341       x4 = xb[3];   x5 = xb[4];
1342       ib = idx + ai[i];
1343       for ( j=0; j<n; j++ ) {
1344         rval      = ib[j]*5;
1345         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1346         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1347         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1348         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1349         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1350         v += 25;
1351       }
1352     }
1353     break;
1354       /* block sizes larger then 5 by 5 are handled by BLAS */
1355     default: {
1356       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1357       if (!a->mult_work) {
1358         k = PetscMax(a->m,a->n);
1359         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1360         CHKPTRQ(a->mult_work);
1361       }
1362       work = a->mult_work;
1363       for ( i=0; i<mbs; i++ ) {
1364         n     = ii[1] - ii[0]; ii++;
1365         ncols = n*bs;
1366         PetscMemzero(work,ncols*sizeof(Scalar));
1367         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1368         v += n*bs2;
1369         x += bs;
1370         workt = work;
1371         for ( j=0; j<n; j++ ) {
1372           zb = z + bs*(*idx++);
1373           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1374           workt += bs;
1375         }
1376       }
1377     }
1378   }
1379   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1380   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1381   PLogFlops(2*a->nz*a->bs2 - a->n);
1382   return 0;
1383 }
1384 
1385 #undef __FUNC__
1386 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1387 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1388 {
1389   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1390   Scalar          *xg,*zg,*zb;
1391   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1392   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1393   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1394 
1395 
1396 
1397   VecGetArray_Fast(xx,xg); x = xg;
1398   VecGetArray_Fast(zz,zg); z = zg;
1399 
1400   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1401   else PetscMemzero(z,N*sizeof(Scalar));
1402 
1403 
1404   idx   = a->j;
1405   v     = a->a;
1406   ii    = a->i;
1407 
1408   switch (bs) {
1409   case 1:
1410     for ( i=0; i<mbs; i++ ) {
1411       n  = ii[1] - ii[0]; ii++;
1412       xb = x + i; x1 = xb[0];
1413       ib = idx + ai[i];
1414       for ( j=0; j<n; j++ ) {
1415         rval    = ib[j];
1416         z[rval] += *v++ * x1;
1417       }
1418     }
1419     break;
1420   case 2:
1421     for ( i=0; i<mbs; i++ ) {
1422       n  = ii[1] - ii[0]; ii++;
1423       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1424       ib = idx + ai[i];
1425       for ( j=0; j<n; j++ ) {
1426         rval      = ib[j]*2;
1427         z[rval++] += v[0]*x1 + v[1]*x2;
1428         z[rval++] += v[2]*x1 + v[3]*x2;
1429         v += 4;
1430       }
1431     }
1432     break;
1433   case 3:
1434     for ( i=0; i<mbs; i++ ) {
1435       n  = ii[1] - ii[0]; ii++;
1436       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1437       ib = idx + ai[i];
1438       for ( j=0; j<n; j++ ) {
1439         rval      = ib[j]*3;
1440         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1441         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1442         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1443         v += 9;
1444       }
1445     }
1446     break;
1447   case 4:
1448     for ( i=0; i<mbs; i++ ) {
1449       n  = ii[1] - ii[0]; ii++;
1450       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1451       ib = idx + ai[i];
1452       for ( j=0; j<n; j++ ) {
1453         rval      = ib[j]*4;
1454         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1455         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1456         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1457         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1458         v += 16;
1459       }
1460     }
1461     break;
1462   case 5:
1463     for ( i=0; i<mbs; i++ ) {
1464       n  = ii[1] - ii[0]; ii++;
1465       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1466       x4 = xb[3];   x5 = xb[4];
1467       ib = idx + ai[i];
1468       for ( j=0; j<n; j++ ) {
1469         rval      = ib[j]*5;
1470         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1471         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1472         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1473         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1474         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1475         v += 25;
1476       }
1477     }
1478     break;
1479       /* block sizes larger then 5 by 5 are handled by BLAS */
1480     default: {
1481       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1482       if (!a->mult_work) {
1483         k = PetscMax(a->m,a->n);
1484         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1485         CHKPTRQ(a->mult_work);
1486       }
1487       work = a->mult_work;
1488       for ( i=0; i<mbs; i++ ) {
1489         n     = ii[1] - ii[0]; ii++;
1490         ncols = n*bs;
1491         PetscMemzero(work,ncols*sizeof(Scalar));
1492         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1493         v += n*bs2;
1494         x += bs;
1495         workt = work;
1496         for ( j=0; j<n; j++ ) {
1497           zb = z + bs*(*idx++);
1498           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1499           workt += bs;
1500         }
1501       }
1502     }
1503   }
1504   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1505   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1506   PLogFlops(2*a->nz*a->bs2);
1507   return 0;
1508 }
1509 
1510 #undef __FUNC__
1511 #define __FUNC__ "MatGetInfo_SeqBAIJ"
1512 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1513 {
1514   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1515 
1516   info->rows_global    = (double)a->m;
1517   info->columns_global = (double)a->n;
1518   info->rows_local     = (double)a->m;
1519   info->columns_local  = (double)a->n;
1520   info->block_size     = a->bs2;
1521   info->nz_allocated   = a->maxnz;
1522   info->nz_used        = a->bs2*a->nz;
1523   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1524   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1525     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1526   info->assemblies   = A->num_ass;
1527   info->mallocs      = a->reallocs;
1528   info->memory       = A->mem;
1529   if (A->factor) {
1530     info->fill_ratio_given  = A->info.fill_ratio_given;
1531     info->fill_ratio_needed = A->info.fill_ratio_needed;
1532     info->factor_mallocs    = A->info.factor_mallocs;
1533   } else {
1534     info->fill_ratio_given  = 0;
1535     info->fill_ratio_needed = 0;
1536     info->factor_mallocs    = 0;
1537   }
1538   return 0;
1539 }
1540 
1541 #undef __FUNC__
1542 #define __FUNC__ "MatEqual_SeqBAIJ"
1543 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1544 {
1545   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1546 
1547   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1548 
1549   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1550   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1551       (a->nz != b->nz)) {
1552     *flg = PETSC_FALSE; return 0;
1553   }
1554 
1555   /* if the a->i are the same */
1556   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1557     *flg = PETSC_FALSE; return 0;
1558   }
1559 
1560   /* if a->j are the same */
1561   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1562     *flg = PETSC_FALSE; return 0;
1563   }
1564 
1565   /* if a->a are the same */
1566   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1567     *flg = PETSC_FALSE; return 0;
1568   }
1569   *flg = PETSC_TRUE;
1570   return 0;
1571 
1572 }
1573 
1574 #undef __FUNC__
1575 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1576 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1577 {
1578   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1579   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1580   Scalar      *x, zero = 0.0,*aa,*aa_j;
1581 
1582   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1583   bs   = a->bs;
1584   aa   = a->a;
1585   ai   = a->i;
1586   aj   = a->j;
1587   ambs = a->mbs;
1588   bs2  = a->bs2;
1589 
1590   VecSet(&zero,v);
1591   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1592   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1593   for ( i=0; i<ambs; i++ ) {
1594     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1595       if (aj[j] == i) {
1596         row  = i*bs;
1597         aa_j = aa+j*bs2;
1598         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1599         break;
1600       }
1601     }
1602   }
1603   return 0;
1604 }
1605 
1606 #undef __FUNC__
1607 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1608 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1609 {
1610   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1611   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1612   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1613 
1614   ai  = a->i;
1615   aj  = a->j;
1616   aa  = a->a;
1617   m   = a->m;
1618   n   = a->n;
1619   bs  = a->bs;
1620   mbs = a->mbs;
1621   bs2 = a->bs2;
1622   if (ll) {
1623     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1624     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1625     for ( i=0; i<mbs; i++ ) { /* for each block row */
1626       M  = ai[i+1] - ai[i];
1627       li = l + i*bs;
1628       v  = aa + bs2*ai[i];
1629       for ( j=0; j<M; j++ ) { /* for each block */
1630         for ( k=0; k<bs2; k++ ) {
1631           (*v++) *= li[k%bs];
1632         }
1633       }
1634     }
1635   }
1636 
1637   if (rr) {
1638     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1639     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1640     for ( i=0; i<mbs; i++ ) { /* for each block row */
1641       M  = ai[i+1] - ai[i];
1642       v  = aa + bs2*ai[i];
1643       for ( j=0; j<M; j++ ) { /* for each block */
1644         ri = r + bs*aj[ai[i]+j];
1645         for ( k=0; k<bs; k++ ) {
1646           x = ri[k];
1647           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1648         }
1649       }
1650     }
1651   }
1652   return 0;
1653 }
1654 
1655 
1656 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1657 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1658 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1659 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1660 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1661 
1662 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1663 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1664 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1665 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1666 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1667 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1668 
1669 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1670 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1671 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1672 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1673 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1674 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1675 
1676 #undef __FUNC__
1677 #define __FUNC__ "MatNorm_SeqBAIJ"
1678 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1679 {
1680   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1681   Scalar      *v = a->a;
1682   double      sum = 0.0;
1683   int         i,nz=a->nz,bs2=a->bs2;
1684 
1685   if (type == NORM_FROBENIUS) {
1686     for (i=0; i< bs2*nz; i++ ) {
1687 #if defined(PETSC_COMPLEX)
1688       sum += real(conj(*v)*(*v)); v++;
1689 #else
1690       sum += (*v)*(*v); v++;
1691 #endif
1692     }
1693     *norm = sqrt(sum);
1694   }
1695   else {
1696     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1697   }
1698   return 0;
1699 }
1700 
1701 /*
1702      note: This can only work for identity for row and col. It would
1703    be good to check this and otherwise generate an error.
1704 */
1705 #undef __FUNC__
1706 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1707 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1708 {
1709   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1710   Mat         outA;
1711   int         ierr;
1712 
1713   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1714 
1715   outA          = inA;
1716   inA->factor   = FACTOR_LU;
1717   a->row        = row;
1718   a->col        = col;
1719 
1720   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1721 
1722   if (!a->diag) {
1723     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1724   }
1725   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1726   return 0;
1727 }
1728 
1729 #undef __FUNC__
1730 #define __FUNC__ "MatScale_SeqBAIJ"
1731 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1732 {
1733   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1734   int         one = 1, totalnz = a->bs2*a->nz;
1735   BLscal_( &totalnz, alpha, a->a, &one );
1736   PLogFlops(totalnz);
1737   return 0;
1738 }
1739 
1740 #undef __FUNC__
1741 #define __FUNC__ "MatGetValues_SeqBAIJ"
1742 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1743 {
1744   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1745   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1746   int        *ai = a->i, *ailen = a->ilen;
1747   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1748   Scalar     *ap, *aa = a->a, zero = 0.0;
1749 
1750   for ( k=0; k<m; k++ ) { /* loop over rows */
1751     row  = im[k]; brow = row/bs;
1752     if (row < 0) SETERRQ(1,0,"Negative row");
1753     if (row >= a->m) SETERRQ(1,0,"Row too large");
1754     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1755     nrow = ailen[brow];
1756     for ( l=0; l<n; l++ ) { /* loop over columns */
1757       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1758       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1759       col  = in[l] ;
1760       bcol = col/bs;
1761       cidx = col%bs;
1762       ridx = row%bs;
1763       high = nrow;
1764       low  = 0; /* assume unsorted */
1765       while (high-low > 5) {
1766         t = (low+high)/2;
1767         if (rp[t] > bcol) high = t;
1768         else             low  = t;
1769       }
1770       for ( i=low; i<high; i++ ) {
1771         if (rp[i] > bcol) break;
1772         if (rp[i] == bcol) {
1773           *v++ = ap[bs2*i+bs*cidx+ridx];
1774           goto finished;
1775         }
1776       }
1777       *v++ = zero;
1778       finished:;
1779     }
1780   }
1781   return 0;
1782 }
1783 
1784 #undef __FUNC__
1785 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
1786 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1787 {
1788   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1789   *bs = baij->bs;
1790   return 0;
1791 }
1792 
1793 /* idx should be of length atlease bs */
1794 #undef __FUNC__
1795 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1796 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1797 {
1798   int i,row;
1799   row = idx[0];
1800   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1801 
1802   for ( i=1; i<bs; i++ ) {
1803     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1804   }
1805   *flg = PETSC_TRUE;
1806   return 0;
1807 }
1808 
1809 #undef __FUNC__
1810 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1811 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1812 {
1813   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1814   IS          is_local;
1815   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1816   PetscTruth  flg;
1817   Scalar      zero = 0.0,*aa;
1818 
1819   /* Make a copy of the IS and  sort it */
1820   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1821   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1822   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1823   ierr = ISSort(is_local); CHKERRQ(ierr);
1824   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1825 
1826   i = 0;
1827   while (i < is_n) {
1828     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1829     flg = PETSC_FALSE;
1830     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1831     if (flg) { /* There exists a block of rows to be Zerowed */
1832       baij->ilen[rows[i]/bs] = 0;
1833       i += bs;
1834     } else { /* Zero out only the requested row */
1835       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1836       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1837       for ( j=0; j<count; j++ ) {
1838         aa[0] = zero;
1839         aa+=bs;
1840       }
1841       i++;
1842     }
1843   }
1844   if (diag) {
1845     for ( j=0; j<is_n; j++ ) {
1846       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1847     }
1848   }
1849   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1850   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1851   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1852   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1853 
1854   return 0;
1855 }
1856 
1857 #undef __FUNC__
1858 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1859 int MatPrintHelp_SeqBAIJ(Mat A)
1860 {
1861   static int called = 0;
1862   MPI_Comm   comm = A->comm;
1863 
1864   if (called) return 0; else called = 1;
1865   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1866   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1867   return 0;
1868 }
1869 
1870 /* -------------------------------------------------------------------*/
1871 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1872        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1873        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1874        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1875        MatSolve_SeqBAIJ_N,0,
1876        0,0,
1877        MatLUFactor_SeqBAIJ,0,
1878        0,
1879        MatTranspose_SeqBAIJ,
1880        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1881        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1882        0,MatAssemblyEnd_SeqBAIJ,
1883        0,
1884        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1885        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1886        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1887        MatILUFactorSymbolic_SeqBAIJ,0,
1888        0,0,
1889        MatConvertSameType_SeqBAIJ,0,0,
1890        MatILUFactor_SeqBAIJ,0,0,
1891        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1892        MatGetValues_SeqBAIJ,0,
1893        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1894        0,0,0,MatGetBlockSize_SeqBAIJ,
1895        MatGetRowIJ_SeqBAIJ,
1896        MatRestoreRowIJ_SeqBAIJ,
1897        0,0,0,0,0,0,
1898        MatSetValuesBlocked_SeqBAIJ};
1899 
1900 #undef __FUNC__
1901 #define __FUNC__ "MatCreateSeqBAIJ"
1902 /*@C
1903    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1904    compressed row) format.  For good matrix assembly performance the
1905    user should preallocate the matrix storage by setting the parameter nz
1906    (or the array nzz).  By setting these parameters accurately, performance
1907    during matrix assembly can be increased by more than a factor of 50.
1908 
1909    Input Parameters:
1910 .  comm - MPI communicator, set to MPI_COMM_SELF
1911 .  bs - size of block
1912 .  m - number of rows
1913 .  n - number of columns
1914 .  nz - number of block nonzeros per block row (same for all rows)
1915 .  nzz - array containing the number of block nonzeros in the various block rows
1916          (possibly different for each block row) or PETSC_NULL
1917 
1918    Output Parameter:
1919 .  A - the matrix
1920 
1921    Options Database Keys:
1922 $    -mat_no_unroll - uses code that does not unroll the loops in the
1923 $                     block calculations (much solver)
1924 $    -mat_block_size - size of the blocks to use
1925 
1926    Notes:
1927    The block AIJ format is fully compatible with standard Fortran 77
1928    storage.  That is, the stored row and column indices can begin at
1929    either one (as in Fortran) or zero.  See the users' manual for details.
1930 
1931    Specify the preallocated storage with either nz or nnz (not both).
1932    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1933    allocation.  For additional details, see the users manual chapter on
1934    matrices.
1935 
1936 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1937 @*/
1938 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1939 {
1940   Mat         B;
1941   Mat_SeqBAIJ *b;
1942   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1943 
1944   MPI_Comm_size(comm,&size);
1945   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
1946 
1947   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1948 
1949   if (mbs*bs!=m || nbs*bs!=n)
1950     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
1951 
1952   *A = 0;
1953   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1954   PLogObjectCreate(B);
1955   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1956   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1957   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1958   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1959   if (!flg) {
1960     switch (bs) {
1961       case 1:
1962         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1963         B->ops.solve           = MatSolve_SeqBAIJ_1;
1964         B->ops.mult            = MatMult_SeqBAIJ_1;
1965         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1966        break;
1967       case 2:
1968         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1969         B->ops.solve           = MatSolve_SeqBAIJ_2;
1970         B->ops.mult            = MatMult_SeqBAIJ_2;
1971         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1972         break;
1973       case 3:
1974         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1975         B->ops.solve           = MatSolve_SeqBAIJ_3;
1976         B->ops.mult            = MatMult_SeqBAIJ_3;
1977         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1978         break;
1979       case 4:
1980         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1981         B->ops.solve           = MatSolve_SeqBAIJ_4;
1982         B->ops.mult            = MatMult_SeqBAIJ_4;
1983         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1984         break;
1985       case 5:
1986         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1987         B->ops.solve           = MatSolve_SeqBAIJ_5;
1988         B->ops.mult            = MatMult_SeqBAIJ_5;
1989         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1990         break;
1991     }
1992   }
1993   B->destroy          = MatDestroy_SeqBAIJ;
1994   B->view             = MatView_SeqBAIJ;
1995   B->factor           = 0;
1996   B->lupivotthreshold = 1.0;
1997   B->mapping          = 0;
1998   b->row              = 0;
1999   b->col              = 0;
2000   b->reallocs         = 0;
2001 
2002   b->m       = m; B->m = m; B->M = m;
2003   b->n       = n; B->n = n; B->N = n;
2004   b->mbs     = mbs;
2005   b->nbs     = nbs;
2006   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2007   if (nnz == PETSC_NULL) {
2008     if (nz == PETSC_DEFAULT) nz = 5;
2009     else if (nz <= 0)        nz = 1;
2010     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2011     nz = nz*mbs;
2012   }
2013   else {
2014     nz = 0;
2015     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2016   }
2017 
2018   /* allocate the matrix space */
2019   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2020   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2021   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2022   b->j  = (int *) (b->a + nz*bs2);
2023   PetscMemzero(b->j,nz*sizeof(int));
2024   b->i  = b->j + nz;
2025   b->singlemalloc = 1;
2026 
2027   b->i[0] = 0;
2028   for (i=1; i<mbs+1; i++) {
2029     b->i[i] = b->i[i-1] + b->imax[i-1];
2030   }
2031 
2032   /* b->ilen will count nonzeros in each block row so far. */
2033   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2034   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2035   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2036 
2037   b->bs               = bs;
2038   b->bs2              = bs2;
2039   b->mbs              = mbs;
2040   b->nz               = 0;
2041   b->maxnz            = nz*bs2;
2042   b->sorted           = 0;
2043   b->roworiented      = 1;
2044   b->nonew            = 0;
2045   b->diag             = 0;
2046   b->solve_work       = 0;
2047   b->mult_work        = 0;
2048   b->spptr            = 0;
2049   B->info.nz_unneeded = (double)b->maxnz;
2050 
2051   *A = B;
2052   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2053   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2054   return 0;
2055 }
2056 
2057 #undef __FUNC__
2058 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2059 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2060 {
2061   Mat         C;
2062   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2063   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2064 
2065   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
2066 
2067   *B = 0;
2068   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
2069   PLogObjectCreate(C);
2070   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2071   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2072   C->destroy    = MatDestroy_SeqBAIJ;
2073   C->view       = MatView_SeqBAIJ;
2074   C->factor     = A->factor;
2075   c->row        = 0;
2076   c->col        = 0;
2077   C->assembled  = PETSC_TRUE;
2078 
2079   c->m = C->m   = a->m;
2080   c->n = C->n   = a->n;
2081   C->M          = a->m;
2082   C->N          = a->n;
2083 
2084   c->bs         = a->bs;
2085   c->bs2        = a->bs2;
2086   c->mbs        = a->mbs;
2087   c->nbs        = a->nbs;
2088 
2089   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2090   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2091   for ( i=0; i<mbs; i++ ) {
2092     c->imax[i] = a->imax[i];
2093     c->ilen[i] = a->ilen[i];
2094   }
2095 
2096   /* allocate the matrix space */
2097   c->singlemalloc = 1;
2098   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2099   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2100   c->j  = (int *) (c->a + nz*bs2);
2101   c->i  = c->j + nz;
2102   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2103   if (mbs > 0) {
2104     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2105     if (cpvalues == COPY_VALUES) {
2106       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2107     }
2108   }
2109 
2110   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2111   c->sorted      = a->sorted;
2112   c->roworiented = a->roworiented;
2113   c->nonew       = a->nonew;
2114 
2115   if (a->diag) {
2116     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2117     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2118     for ( i=0; i<mbs; i++ ) {
2119       c->diag[i] = a->diag[i];
2120     }
2121   }
2122   else c->diag          = 0;
2123   c->nz                 = a->nz;
2124   c->maxnz              = a->maxnz;
2125   c->solve_work         = 0;
2126   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2127   c->mult_work          = 0;
2128   *B = C;
2129   return 0;
2130 }
2131 
2132 #undef __FUNC__
2133 #define __FUNC__ "MatLoad_SeqBAIJ"
2134 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2135 {
2136   Mat_SeqBAIJ  *a;
2137   Mat          B;
2138   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2139   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2140   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2141   int          *masked, nmask,tmp,bs2,ishift;
2142   Scalar       *aa;
2143   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2144 
2145   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2146   bs2  = bs*bs;
2147 
2148   MPI_Comm_size(comm,&size);
2149   if (size > 1) SETERRQ(1,0,"view must have one processor");
2150   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2151   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2152   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2153   M = header[1]; N = header[2]; nz = header[3];
2154 
2155   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2156 
2157   /*
2158      This code adds extra rows to make sure the number of rows is
2159     divisible by the blocksize
2160   */
2161   mbs        = M/bs;
2162   extra_rows = bs - M + bs*(mbs);
2163   if (extra_rows == bs) extra_rows = 0;
2164   else                  mbs++;
2165   if (extra_rows) {
2166     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2167   }
2168 
2169   /* read in row lengths */
2170   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2171   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2172   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2173 
2174   /* read in column indices */
2175   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2176   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2177   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2178 
2179   /* loop over row lengths determining block row lengths */
2180   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2181   PetscMemzero(browlengths,mbs*sizeof(int));
2182   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2183   PetscMemzero(mask,mbs*sizeof(int));
2184   masked = mask + mbs;
2185   rowcount = 0; nzcount = 0;
2186   for ( i=0; i<mbs; i++ ) {
2187     nmask = 0;
2188     for ( j=0; j<bs; j++ ) {
2189       kmax = rowlengths[rowcount];
2190       for ( k=0; k<kmax; k++ ) {
2191         tmp = jj[nzcount++]/bs;
2192         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2193       }
2194       rowcount++;
2195     }
2196     browlengths[i] += nmask;
2197     /* zero out the mask elements we set */
2198     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2199   }
2200 
2201   /* create our matrix */
2202   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2203          CHKERRQ(ierr);
2204   B = *A;
2205   a = (Mat_SeqBAIJ *) B->data;
2206 
2207   /* set matrix "i" values */
2208   a->i[0] = 0;
2209   for ( i=1; i<= mbs; i++ ) {
2210     a->i[i]      = a->i[i-1] + browlengths[i-1];
2211     a->ilen[i-1] = browlengths[i-1];
2212   }
2213   a->nz         = 0;
2214   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2215 
2216   /* read in nonzero values */
2217   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2218   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2219   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2220 
2221   /* set "a" and "j" values into matrix */
2222   nzcount = 0; jcount = 0;
2223   for ( i=0; i<mbs; i++ ) {
2224     nzcountb = nzcount;
2225     nmask    = 0;
2226     for ( j=0; j<bs; j++ ) {
2227       kmax = rowlengths[i*bs+j];
2228       for ( k=0; k<kmax; k++ ) {
2229         tmp = jj[nzcount++]/bs;
2230 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2231       }
2232       rowcount++;
2233     }
2234     /* sort the masked values */
2235     PetscSortInt(nmask,masked);
2236 
2237     /* set "j" values into matrix */
2238     maskcount = 1;
2239     for ( j=0; j<nmask; j++ ) {
2240       a->j[jcount++]  = masked[j];
2241       mask[masked[j]] = maskcount++;
2242     }
2243     /* set "a" values into matrix */
2244     ishift = bs2*a->i[i];
2245     for ( j=0; j<bs; j++ ) {
2246       kmax = rowlengths[i*bs+j];
2247       for ( k=0; k<kmax; k++ ) {
2248         tmp    = jj[nzcountb]/bs ;
2249         block  = mask[tmp] - 1;
2250         point  = jj[nzcountb] - bs*tmp;
2251         idx    = ishift + bs2*block + j + bs*point;
2252         a->a[idx] = aa[nzcountb++];
2253       }
2254     }
2255     /* zero out the mask elements we set */
2256     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2257   }
2258   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2259 
2260   PetscFree(rowlengths);
2261   PetscFree(browlengths);
2262   PetscFree(aa);
2263   PetscFree(jj);
2264   PetscFree(mask);
2265 
2266   B->assembled = PETSC_TRUE;
2267 
2268   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2269   if (flg) {
2270     Viewer tviewer;
2271     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2272     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2273     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2274     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2275   }
2276   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2277   if (flg) {
2278     Viewer tviewer;
2279     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2280     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2281     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2282     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2283   }
2284   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2285   if (flg) {
2286     Viewer tviewer;
2287     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2288     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2289     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2290   }
2291   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2292   if (flg) {
2293     Viewer tviewer;
2294     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2295     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2296     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2297     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2298   }
2299   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2300   if (flg) {
2301     Viewer tviewer;
2302     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2303     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2304     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2305     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2306   }
2307   return 0;
2308 }
2309 
2310 
2311 
2312