xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8a36c062a42118aeb7a8c1f4af6ee2dc0f5436fa)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.103 1997/06/03 20:26:37 balay Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 
10 #include "pinclude/pviewer.h"
11 #include "sys.h"
12 #include "src/mat/impls/baij/seq/baij.h"
13 #include "src/vec/vecimpl.h"
14 #include "src/inline/spops.h"
15 #include "petsc.h"
16 
17 
18 /*
19      Adds diagonal pointers to sparse matrix structure.
20 */
21 
22 #undef __FUNC__
23 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24 int MatMarkDiag_SeqBAIJ(Mat A)
25 {
26   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
27   int         i,j, *diag, m = a->mbs;
28 
29   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30   PLogObjectMemory(A,(m+1)*sizeof(int));
31   for ( i=0; i<m; i++ ) {
32     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33       if (a->j[j] == i) {
34         diag[i] = j;
35         break;
36       }
37     }
38   }
39   a->diag = diag;
40   return 0;
41 }
42 
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */
346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert1;
418         }
419       }
420       if (nonew == 1) goto noinsert1;
421       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
422       if (nrow >= rmax) {
423         /* there is no extra room in row, therefore enlarge */
424         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
425         Scalar *new_a;
426 
427         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
428 
429         /* Malloc new storage space */
430         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
431         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
432         new_j   = (int *) (new_a + bs2*new_nz);
433         new_i   = new_j + new_nz;
434 
435         /* copy over old data into new slots */
436         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
437         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
438         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
439         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
440         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
441                                                            len*sizeof(int));
442         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
443         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
444         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
445                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
446         /* free up old matrix storage */
447         PetscFree(a->a);
448         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
449         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
450         a->singlemalloc = 1;
451 
452         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
453         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
454         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
455         a->maxnz += bs2*CHUNKSIZE;
456         a->reallocs++;
457         a->nz++;
458       }
459       N = nrow++ - 1;
460       /* shift up all the later entries in this row */
461       for ( ii=N; ii>=i; ii-- ) {
462         rp[ii+1] = rp[ii];
463         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
464       }
465       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
466       rp[i]                      = bcol;
467       ap[bs2*i + bs*cidx + ridx] = value;
468       noinsert1:;
469       low = i;
470     }
471     ailen[brow] = nrow;
472   }
473   return 0;
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
478 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
479 {
480   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
481   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
482   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
483   int         roworiented=a->roworiented;
484   int         *aj=a->j,nonew=a->nonew;
485   int          bs2=a->bs2,bs=a->bs,stepval;
486   Scalar      *ap,*value=v,*aa=a->a,*bap;
487 
488   if (roworiented) {
489     stepval = (n-1)*bs;
490   } else {
491     stepval = (m-1)*bs;
492   }
493   for ( k=0; k<m; k++ ) { /* loop over added rows */
494     row  = im[k];
495 #if defined(PETSC_BOPT_g)
496     if (row < 0) SETERRQ(1,0,"Negative row");
497     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
498 #endif
499     rp   = aj + ai[row];
500     ap   = aa + bs2*ai[row];
501     rmax = imax[row];
502     nrow = ailen[row];
503     low  = 0;
504     for ( l=0; l<n; l++ ) { /* loop over added columns */
505 #if defined(PETSC_BOPT_g)
506       if (in[l] < 0) SETERRQ(1,0,"Negative column");
507       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
508 #endif
509       col = in[l];
510       if (roworiented) {
511         value = v + k*(stepval+bs)*bs + l*bs;
512       } else {
513         value = v + l*(stepval+bs)*bs + k*bs;
514       }
515       if (!sorted) low = 0; high = nrow;
516       while (high-low > 7) {
517         t = (low+high)/2;
518         if (rp[t] > col) high = t;
519         else             low  = t;
520       }
521       for ( i=low; i<high; i++ ) {
522         if (rp[i] > col) break;
523         if (rp[i] == col) {
524           bap  = ap +  bs2*i;
525           if (roworiented) {
526             if (is == ADD_VALUES) {
527               for ( ii=0; ii<bs; ii++,value+=stepval )
528                 for (jj=ii; jj<bs2; jj+=bs )
529                   bap[jj] += *value++;
530             } else {
531               for ( ii=0; ii<bs; ii++,value+=stepval )
532                 for (jj=ii; jj<bs2; jj+=bs )
533                   bap[jj] = *value++;
534             }
535           } else {
536             if (is == ADD_VALUES) {
537               for ( ii=0; ii<bs; ii++,value+=stepval )
538                 for (jj=0; jj<bs; jj++ )
539                   *bap++ += *value++;
540             } else {
541               for ( ii=0; ii<bs; ii++,value+=stepval )
542                 for (jj=0; jj<bs; jj++ )
543                   *bap++  = *value++;
544             }
545           }
546           goto noinsert2;
547         }
548       }
549       if (nonew == 1) goto noinsert2;
550       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
551       if (nrow >= rmax) {
552         /* there is no extra room in row, therefore enlarge */
553         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
554         Scalar *new_a;
555 
556         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
557 
558         /* malloc new storage space */
559         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
560         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
561         new_j   = (int *) (new_a + bs2*new_nz);
562         new_i   = new_j + new_nz;
563 
564         /* copy over old data into new slots */
565         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
566         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
567         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
568         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
569         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
570                                                            len*sizeof(int));
571         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
572         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
573         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
574                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
575         /* free up old matrix storage */
576         PetscFree(a->a);
577         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
578         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
579         a->singlemalloc = 1;
580 
581         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
582         rmax = imax[row] = imax[row] + CHUNKSIZE;
583         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
584         a->maxnz += bs2*CHUNKSIZE;
585         a->reallocs++;
586         a->nz++;
587       }
588       N = nrow++ - 1;
589       /* shift up all the later entries in this row */
590       for ( ii=N; ii>=i; ii-- ) {
591         rp[ii+1] = rp[ii];
592         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
593       }
594       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
595       rp[i] = col;
596       bap   = ap +  bs2*i;
597       if (roworiented) {
598         for ( ii=0; ii<bs; ii++,value+=stepval)
599           for (jj=ii; jj<bs2; jj+=bs )
600             bap[jj] = *value++;
601       } else {
602         for ( ii=0; ii<bs; ii++,value+=stepval )
603           for (jj=0; jj<bs; jj++ )
604             *bap++  = *value++;
605       }
606       noinsert2:;
607       low = i;
608     }
609     ailen[row] = nrow;
610   }
611   return 0;
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */
616 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
617 {
618   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
619   *m = a->m; *n = a->n;
620   return 0;
621 }
622 
623 #undef __FUNC__
624 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */
625 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
626 {
627   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
628   *m = 0; *n = a->m;
629   return 0;
630 }
631 
632 #undef __FUNC__
633 #define __FUNC__ "MatGetRow_SeqBAIJ"
634 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
635 {
636   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
637   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
638   Scalar      *aa,*v_i,*aa_i;
639 
640   bs  = a->bs;
641   ai  = a->i;
642   aj  = a->j;
643   aa  = a->a;
644   bs2 = a->bs2;
645 
646   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
647 
648   bn  = row/bs;   /* Block number */
649   bp  = row % bs; /* Block Position */
650   M   = ai[bn+1] - ai[bn];
651   *nz = bs*M;
652 
653   if (v) {
654     *v = 0;
655     if (*nz) {
656       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
657       for ( i=0; i<M; i++ ) { /* for each block in the block row */
658         v_i  = *v + i*bs;
659         aa_i = aa + bs2*(ai[bn] + i);
660         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
661       }
662     }
663   }
664 
665   if (idx) {
666     *idx = 0;
667     if (*nz) {
668       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
669       for ( i=0; i<M; i++ ) { /* for each block in the block row */
670         idx_i = *idx + i*bs;
671         itmp  = bs*aj[ai[bn] + i];
672         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
673       }
674     }
675   }
676   return 0;
677 }
678 
679 #undef __FUNC__
680 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */
681 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
682 {
683   if (idx) {if (*idx) PetscFree(*idx);}
684   if (v)   {if (*v)   PetscFree(*v);}
685   return 0;
686 }
687 
688 #undef __FUNC__
689 #define __FUNC__ "MatTranspose_SeqBAIJ"
690 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
691 {
692   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
693   Mat         C;
694   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
695   int         *rows,*cols,bs2=a->bs2;
696   Scalar      *array=a->a;
697 
698   if (B==PETSC_NULL && mbs!=nbs)
699     SETERRQ(1,0,"Square matrix only for in-place");
700   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
701   PetscMemzero(col,(1+nbs)*sizeof(int));
702 
703   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
704   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
705   PetscFree(col);
706   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
707   cols = rows + bs;
708   for ( i=0; i<mbs; i++ ) {
709     cols[0] = i*bs;
710     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
711     len = ai[i+1] - ai[i];
712     for ( j=0; j<len; j++ ) {
713       rows[0] = (*aj++)*bs;
714       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
715       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
716       array += bs2;
717     }
718   }
719   PetscFree(rows);
720 
721   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
723 
724   if (B != PETSC_NULL) {
725     *B = C;
726   } else {
727     /* This isn't really an in-place transpose */
728     PetscFree(a->a);
729     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
730     if (a->diag) PetscFree(a->diag);
731     if (a->ilen) PetscFree(a->ilen);
732     if (a->imax) PetscFree(a->imax);
733     if (a->solve_work) PetscFree(a->solve_work);
734     PetscFree(a);
735     PetscMemcpy(A,C,sizeof(struct _p_Mat));
736     PetscHeaderDestroy(C);
737   }
738   return 0;
739 }
740 
741 
742 #undef __FUNC__
743 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
744 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
745 {
746   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
747   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
748   int        m = a->m,*ip, N, *ailen = a->ilen;
749   int        mbs = a->mbs, bs2 = a->bs2,rmax;
750   Scalar     *aa = a->a, *ap;
751 
752   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
753 
754   rmax = ailen[0];
755   for ( i=1; i<mbs; i++ ) {
756     /* move each row back by the amount of empty slots (fshift) before it*/
757     fshift += imax[i-1] - ailen[i-1];
758     rmax   = PetscMax(rmax,ailen[i]);
759     if (fshift) {
760       ip = aj + ai[i]; ap = aa + bs2*ai[i];
761       N = ailen[i];
762       for ( j=0; j<N; j++ ) {
763         ip[j-fshift] = ip[j];
764         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
765       }
766     }
767     ai[i] = ai[i-1] + ailen[i-1];
768   }
769   if (mbs) {
770     fshift += imax[mbs-1] - ailen[mbs-1];
771     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
772   }
773   /* reset ilen and imax for each row */
774   for ( i=0; i<mbs; i++ ) {
775     ailen[i] = imax[i] = ai[i+1] - ai[i];
776   }
777   a->nz = ai[mbs];
778 
779   /* diagonals may have moved, so kill the diagonal pointers */
780   if (fshift && a->diag) {
781     PetscFree(a->diag);
782     PLogObjectMemory(A,-(m+1)*sizeof(int));
783     a->diag = 0;
784   }
785   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
786            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
787   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
788            a->reallocs);
789   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
790   a->reallocs          = 0;
791   A->info.nz_unneeded  = (double)fshift*bs2;
792 
793   return 0;
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
798 int MatZeroEntries_SeqBAIJ(Mat A)
799 {
800   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
801   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
802   return 0;
803 }
804 
805 #undef __FUNC__
806 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */
807 int MatDestroy_SeqBAIJ(PetscObject obj)
808 {
809   Mat         A  = (Mat) obj;
810   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
811 
812 #if defined(PETSC_LOG)
813   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
814 #endif
815   PetscFree(a->a);
816   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
817   if (a->diag) PetscFree(a->diag);
818   if (a->ilen) PetscFree(a->ilen);
819   if (a->imax) PetscFree(a->imax);
820   if (a->solve_work) PetscFree(a->solve_work);
821   if (a->mult_work) PetscFree(a->mult_work);
822   PetscFree(a);
823   PLogObjectDestroy(A);
824   PetscHeaderDestroy(A);
825   return 0;
826 }
827 
828 #undef __FUNC__
829 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */
830 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
831 {
832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
833   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
834   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
835   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
836   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
837   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
838   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
839   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
840   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
841   else if (op == MAT_ROWS_SORTED ||
842            op == MAT_ROWS_UNSORTED ||
843            op == MAT_SYMMETRIC ||
844            op == MAT_STRUCTURALLY_SYMMETRIC ||
845            op == MAT_YES_NEW_DIAGONALS ||
846            op == MAT_IGNORE_OFF_PROC_ENTRIES)
847     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
848   else if (op == MAT_NO_NEW_DIAGONALS)
849     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
850   else
851     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
852   return 0;
853 }
854 
855 
856 /* -------------------------------------------------------*/
857 /* Should check that shapes of vectors and matrices match */
858 /* -------------------------------------------------------*/
859 #include "pinclude/plapack.h"
860 
861 #undef __FUNC__
862 #define __FUNC__ "MatMult_SeqBAIJ_1"
863 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
864 {
865   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
866   register Scalar *x,*z,*v,sum;
867   int             mbs=a->mbs,i,*idx,*ii,n;
868 
869   VecGetArray_Fast(xx,x);
870   VecGetArray_Fast(zz,z);
871 
872   idx   = a->j;
873   v     = a->a;
874   ii    = a->i;
875 
876   for ( i=0; i<mbs; i++ ) {
877     n    = ii[1] - ii[0]; ii++;
878     sum  = 0.0;
879     while (n--) sum += *v++ * x[*idx++];
880     z[i] = sum;
881   }
882   VecRestoreArray_Fast(xx,x);
883   VecRestoreArray_Fast(zz,z);
884   PLogFlops(2*a->nz - a->m);
885   return 0;
886 }
887 
888 #undef __FUNC__
889 #define __FUNC__ "MatMult_SeqBAIJ_2"
890 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
891 {
892   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
893   register Scalar *x,*z,*v,*xb,sum1,sum2;
894   register Scalar x1,x2;
895   int             mbs=a->mbs,i,*idx,*ii;
896   int             j,n;
897 
898   VecGetArray_Fast(xx,x);
899   VecGetArray_Fast(zz,z);
900 
901   idx   = a->j;
902   v     = a->a;
903   ii    = a->i;
904 
905   for ( i=0; i<mbs; i++ ) {
906     n  = ii[1] - ii[0]; ii++;
907     sum1 = 0.0; sum2 = 0.0;
908     for ( j=0; j<n; j++ ) {
909       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
910       sum1 += v[0]*x1 + v[2]*x2;
911       sum2 += v[1]*x1 + v[3]*x2;
912       v += 4;
913     }
914     z[0] = sum1; z[1] = sum2;
915     z += 2;
916   }
917   VecRestoreArray_Fast(xx,x);
918   VecRestoreArray_Fast(zz,z);
919   PLogFlops(4*a->nz - a->m);
920   return 0;
921 }
922 
923 #undef __FUNC__
924 #define __FUNC__ "MatMult_SeqBAIJ_3"
925 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
926 {
927   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
928   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
929   int             mbs=a->mbs,i,*idx,*ii,j,n;
930 
931   VecGetArray_Fast(xx,x);
932   VecGetArray_Fast(zz,z);
933 
934   idx   = a->j;
935   v     = a->a;
936   ii    = a->i;
937 
938   for ( i=0; i<mbs; i++ ) {
939     n  = ii[1] - ii[0]; ii++;
940     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
941     for ( j=0; j<n; j++ ) {
942       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
943       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
944       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
945       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
946       v += 9;
947     }
948     z[0] = sum1; z[1] = sum2; z[2] = sum3;
949     z += 3;
950   }
951   VecRestoreArray_Fast(xx,x);
952   VecRestoreArray_Fast(zz,z);
953   PLogFlops(18*a->nz - a->m);
954   return 0;
955 }
956 
957 #undef __FUNC__
958 #define __FUNC__ "MatMult_SeqBAIJ_4"
959 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
960 {
961   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
962   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
963   register Scalar x1,x2,x3,x4;
964   int             mbs=a->mbs,i,*idx,*ii;
965   int             j,n;
966 
967   VecGetArray_Fast(xx,x);
968   VecGetArray_Fast(zz,z);
969 
970   idx   = a->j;
971   v     = a->a;
972   ii    = a->i;
973 
974   for ( i=0; i<mbs; i++ ) {
975     n  = ii[1] - ii[0]; ii++;
976     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
977     for ( j=0; j<n; j++ ) {
978       xb = x + 4*(*idx++);
979       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
980       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
981       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
982       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
983       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
984       v += 16;
985     }
986     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
987     z += 4;
988   }
989   VecRestoreArray_Fast(xx,x);
990   VecRestoreArray_Fast(zz,z);
991   PLogFlops(32*a->nz - a->m);
992   return 0;
993 }
994 
995 #undef __FUNC__
996 #define __FUNC__ "MatMult_SeqBAIJ_5"
997 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
998 {
999   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1000   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1001   register Scalar x1,x2,x3,x4,x5;
1002   int             mbs=a->mbs,i,*idx,*ii,j,n;
1003 
1004   VecGetArray_Fast(xx,x);
1005   VecGetArray_Fast(zz,z);
1006 
1007   idx   = a->j;
1008   v     = a->a;
1009   ii    = a->i;
1010 
1011   for ( i=0; i<mbs; i++ ) {
1012     n  = ii[1] - ii[0]; ii++;
1013     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1014     for ( j=0; j<n; j++ ) {
1015       xb = x + 5*(*idx++);
1016       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1017       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1018       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1019       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1020       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1021       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1022       v += 25;
1023     }
1024     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1025     z += 5;
1026   }
1027   VecRestoreArray_Fast(xx,x);
1028   VecRestoreArray_Fast(zz,z);
1029   PLogFlops(50*a->nz - a->m);
1030   return 0;
1031 }
1032 
1033 #undef __FUNC__
1034 #define __FUNC__ "MatMult_SeqBAIJ_7"
1035 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1036 {
1037   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1038   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1039   register Scalar x1,x2,x3,x4,x5,x6,x7;
1040   int             mbs=a->mbs,i,*idx,*ii,j,n;
1041 
1042   VecGetArray_Fast(xx,x);
1043   VecGetArray_Fast(zz,z);
1044 
1045   idx   = a->j;
1046   v     = a->a;
1047   ii    = a->i;
1048 
1049   for ( i=0; i<mbs; i++ ) {
1050     n  = ii[1] - ii[0]; ii++;
1051     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1052     for ( j=0; j<n; j++ ) {
1053       xb = x + 7*(*idx++);
1054       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1055       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1056       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1057       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1058       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1059       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1060       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1061       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1062       v += 49;
1063     }
1064     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1065     z += 7;
1066   }
1067 
1068   VecRestoreArray_Fast(xx,x);
1069   VecRestoreArray_Fast(zz,z);
1070   PLogFlops(98*a->nz - a->m);
1071   return 0;
1072 }
1073 
1074 #undef __FUNC__
1075 #define __FUNC__ "MatMult_SeqBAIJ_N"
1076 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1077 {
1078   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1079   register Scalar *x,*z,*v,*xb;
1080   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1081   int             _One = 1,ncols,k;
1082   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1083 
1084   VecGetArray_Fast(xx,x);
1085   VecGetArray_Fast(zz,z);
1086 
1087   idx   = a->j;
1088   v     = a->a;
1089   ii    = a->i;
1090 
1091 
1092   if (!a->mult_work) {
1093     k = PetscMax(a->m,a->n);
1094     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1095   }
1096   work = a->mult_work;
1097   for ( i=0; i<mbs; i++ ) {
1098     n     = ii[1] - ii[0]; ii++;
1099     ncols = n*bs;
1100     workt = work;
1101     for ( j=0; j<n; j++ ) {
1102       xb = x + bs*(*idx++);
1103       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1104       workt += bs;
1105     }
1106     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1107     v += n*bs2;
1108     z += bs;
1109   }
1110   VecRestoreArray_Fast(xx,x);
1111   VecRestoreArray_Fast(zz,z);
1112   PLogFlops(2*a->nz*bs2 - a->m);
1113   return 0;
1114 }
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1118 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1119 {
1120   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1121   register Scalar *x,*y,*z,*v,sum;
1122   int             mbs=a->mbs,i,*idx,*ii,n;
1123 
1124   VecGetArray_Fast(xx,x);
1125   VecGetArray_Fast(yy,y);
1126   VecGetArray_Fast(zz,z);
1127 
1128   idx   = a->j;
1129   v     = a->a;
1130   ii    = a->i;
1131 
1132   for ( i=0; i<mbs; i++ ) {
1133     n    = ii[1] - ii[0]; ii++;
1134     sum  = y[i];
1135     while (n--) sum += *v++ * x[*idx++];
1136     z[i] = sum;
1137   }
1138   VecRestoreArray_Fast(xx,x);
1139   VecRestoreArray_Fast(yy,y);
1140   VecRestoreArray_Fast(zz,z);
1141   PLogFlops(2*a->nz);
1142   return 0;
1143 }
1144 
1145 #undef __FUNC__
1146 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1147 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1150   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1151   register Scalar x1,x2;
1152   int             mbs=a->mbs,i,*idx,*ii;
1153   int             j,n;
1154 
1155   VecGetArray_Fast(xx,x);
1156   VecGetArray_Fast(yy,y);
1157   VecGetArray_Fast(zz,z);
1158 
1159   idx   = a->j;
1160   v     = a->a;
1161   ii    = a->i;
1162 
1163   for ( i=0; i<mbs; i++ ) {
1164     n  = ii[1] - ii[0]; ii++;
1165     sum1 = y[0]; sum2 = y[1];
1166     for ( j=0; j<n; j++ ) {
1167       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1168       sum1 += v[0]*x1 + v[2]*x2;
1169       sum2 += v[1]*x1 + v[3]*x2;
1170       v += 4;
1171     }
1172     z[0] = sum1; z[1] = sum2;
1173     z += 2; y += 2;
1174   }
1175   VecRestoreArray_Fast(xx,x);
1176   VecRestoreArray_Fast(yy,y);
1177   VecRestoreArray_Fast(zz,z);
1178   PLogFlops(4*a->nz);
1179   return 0;
1180 }
1181 
1182 #undef __FUNC__
1183 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1184 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1185 {
1186   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1187   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1188   int             mbs=a->mbs,i,*idx,*ii,j,n;
1189 
1190   VecGetArray_Fast(xx,x);
1191   VecGetArray_Fast(yy,y);
1192   VecGetArray_Fast(zz,z);
1193 
1194   idx   = a->j;
1195   v     = a->a;
1196   ii    = a->i;
1197 
1198   for ( i=0; i<mbs; i++ ) {
1199     n  = ii[1] - ii[0]; ii++;
1200     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1201     for ( j=0; j<n; j++ ) {
1202       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1203       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1204       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1205       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1206       v += 9;
1207     }
1208     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1209     z += 3; y += 3;
1210   }
1211   VecRestoreArray_Fast(xx,x);
1212   VecRestoreArray_Fast(yy,y);
1213   VecRestoreArray_Fast(zz,z);
1214   PLogFlops(18*a->nz);
1215   return 0;
1216 }
1217 
1218 #undef __FUNC__
1219 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1220 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1223   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1224   register Scalar x1,x2,x3,x4;
1225   int             mbs=a->mbs,i,*idx,*ii;
1226   int             j,n;
1227 
1228   VecGetArray_Fast(xx,x);
1229   VecGetArray_Fast(yy,y);
1230   VecGetArray_Fast(zz,z);
1231 
1232   idx   = a->j;
1233   v     = a->a;
1234   ii    = a->i;
1235 
1236   for ( i=0; i<mbs; i++ ) {
1237     n  = ii[1] - ii[0]; ii++;
1238     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1239     for ( j=0; j<n; j++ ) {
1240       xb = x + 4*(*idx++);
1241       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1242       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1243       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1244       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1245       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1246       v += 16;
1247     }
1248     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1249     z += 4; y += 4;
1250   }
1251   VecRestoreArray_Fast(xx,x);
1252   VecRestoreArray_Fast(yy,y);
1253   VecRestoreArray_Fast(zz,z);
1254   PLogFlops(32*a->nz);
1255   return 0;
1256 }
1257 
1258 #undef __FUNC__
1259 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1260 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1263   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1264   register Scalar x1,x2,x3,x4,x5;
1265   int             mbs=a->mbs,i,*idx,*ii,j,n;
1266 
1267   VecGetArray_Fast(xx,x);
1268   VecGetArray_Fast(yy,y);
1269   VecGetArray_Fast(zz,z);
1270 
1271   idx   = a->j;
1272   v     = a->a;
1273   ii    = a->i;
1274 
1275   for ( i=0; i<mbs; i++ ) {
1276     n  = ii[1] - ii[0]; ii++;
1277     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1278     for ( j=0; j<n; j++ ) {
1279       xb = x + 5*(*idx++);
1280       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1281       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1282       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1283       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1284       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1285       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1286       v += 25;
1287     }
1288     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1289     z += 5; y += 5;
1290   }
1291   VecRestoreArray_Fast(xx,x);
1292   VecRestoreArray_Fast(yy,y);
1293   VecRestoreArray_Fast(zz,z);
1294   PLogFlops(50*a->nz);
1295   return 0;
1296 }
1297 
1298 #undef __FUNC__
1299 #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1300 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1301 {
1302   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1303   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1304   register Scalar x1,x2,x3,x4,x5,x6,x7;
1305   int             mbs=a->mbs,i,*idx,*ii,j,n;
1306 
1307   VecGetArray_Fast(xx,x);
1308   VecGetArray_Fast(yy,y);
1309   VecGetArray_Fast(zz,z);
1310 
1311   idx   = a->j;
1312   v     = a->a;
1313   ii    = a->i;
1314 
1315   for ( i=0; i<mbs; i++ ) {
1316     n  = ii[1] - ii[0]; ii++;
1317     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1318     for ( j=0; j<n; j++ ) {
1319       xb = x + 7*(*idx++);
1320       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1321       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1322       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1323       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1324       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1325       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1326       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1327       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1328       v += 49;
1329     }
1330     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1331     z += 7; y += 7;
1332   }
1333   VecRestoreArray_Fast(xx,x);
1334   VecRestoreArray_Fast(yy,y);
1335   VecRestoreArray_Fast(zz,z);
1336   PLogFlops(98*a->nz);
1337   return 0;
1338 }
1339 
1340 
1341 #undef __FUNC__
1342 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1343 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1344 {
1345   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1346   register Scalar *x,*z,*v,*xb;
1347   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1348   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1349 
1350   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1351 
1352   VecGetArray_Fast(xx,x);
1353   VecGetArray_Fast(zz,z);
1354 
1355   idx   = a->j;
1356   v     = a->a;
1357   ii    = a->i;
1358 
1359 
1360   if (!a->mult_work) {
1361     k = PetscMax(a->m,a->n);
1362     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1363   }
1364   work = a->mult_work;
1365   for ( i=0; i<mbs; i++ ) {
1366     n     = ii[1] - ii[0]; ii++;
1367     ncols = n*bs;
1368     workt = work;
1369     for ( j=0; j<n; j++ ) {
1370       xb = x + bs*(*idx++);
1371       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1372       workt += bs;
1373     }
1374     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1375     v += n*bs2;
1376     z += bs;
1377   }
1378   VecRestoreArray_Fast(xx,x);
1379   VecRestoreArray_Fast(zz,z);
1380   PLogFlops(2*a->nz*bs2 );
1381   return 0;
1382 }
1383 
1384 #undef __FUNC__
1385 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1386 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1387 {
1388   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1389   Scalar          *xg,*zg,*zb;
1390   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1391   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1392   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1393 
1394 
1395   VecGetArray_Fast(xx,xg); x = xg;
1396   VecGetArray_Fast(zz,zg); z = zg;
1397   PetscMemzero(z,N*sizeof(Scalar));
1398 
1399   idx   = a->j;
1400   v     = a->a;
1401   ii    = a->i;
1402 
1403   switch (bs) {
1404   case 1:
1405     for ( i=0; i<mbs; i++ ) {
1406       n  = ii[1] - ii[0]; ii++;
1407       xb = x + i; x1 = xb[0];
1408       ib = idx + ai[i];
1409       for ( j=0; j<n; j++ ) {
1410         rval    = ib[j];
1411         z[rval] += *v++ * x1;
1412       }
1413     }
1414     break;
1415   case 2:
1416     for ( i=0; i<mbs; i++ ) {
1417       n  = ii[1] - ii[0]; ii++;
1418       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1419       ib = idx + ai[i];
1420       for ( j=0; j<n; j++ ) {
1421         rval      = ib[j]*2;
1422         z[rval++] += v[0]*x1 + v[1]*x2;
1423         z[rval++] += v[2]*x1 + v[3]*x2;
1424         v += 4;
1425       }
1426     }
1427     break;
1428   case 3:
1429     for ( i=0; i<mbs; i++ ) {
1430       n  = ii[1] - ii[0]; ii++;
1431       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1432       ib = idx + ai[i];
1433       for ( j=0; j<n; j++ ) {
1434         rval      = ib[j]*3;
1435         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1436         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1437         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1438         v += 9;
1439       }
1440     }
1441     break;
1442   case 4:
1443     for ( i=0; i<mbs; i++ ) {
1444       n  = ii[1] - ii[0]; ii++;
1445       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1446       ib = idx + ai[i];
1447       for ( j=0; j<n; j++ ) {
1448         rval      = ib[j]*4;
1449         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1450         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1451         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1452         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1453         v += 16;
1454       }
1455     }
1456     break;
1457   case 5:
1458     for ( i=0; i<mbs; i++ ) {
1459       n  = ii[1] - ii[0]; ii++;
1460       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1461       x4 = xb[3];   x5 = xb[4];
1462       ib = idx + ai[i];
1463       for ( j=0; j<n; j++ ) {
1464         rval      = ib[j]*5;
1465         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1466         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1467         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1468         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1469         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1470         v += 25;
1471       }
1472     }
1473     break;
1474       /* block sizes larger then 5 by 5 are handled by BLAS */
1475     default: {
1476       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1477       if (!a->mult_work) {
1478         k = PetscMax(a->m,a->n);
1479         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1480         CHKPTRQ(a->mult_work);
1481       }
1482       work = a->mult_work;
1483       for ( i=0; i<mbs; i++ ) {
1484         n     = ii[1] - ii[0]; ii++;
1485         ncols = n*bs;
1486         PetscMemzero(work,ncols*sizeof(Scalar));
1487         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1488         v += n*bs2;
1489         x += bs;
1490         workt = work;
1491         for ( j=0; j<n; j++ ) {
1492           zb = z + bs*(*idx++);
1493           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1494           workt += bs;
1495         }
1496       }
1497     }
1498   }
1499   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1500   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1501   PLogFlops(2*a->nz*a->bs2 - a->n);
1502   return 0;
1503 }
1504 
1505 #undef __FUNC__
1506 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1507 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1508 {
1509   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1510   Scalar          *xg,*zg,*zb;
1511   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1512   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1513   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1514 
1515 
1516 
1517   VecGetArray_Fast(xx,xg); x = xg;
1518   VecGetArray_Fast(zz,zg); z = zg;
1519 
1520   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1521   else PetscMemzero(z,N*sizeof(Scalar));
1522 
1523 
1524   idx   = a->j;
1525   v     = a->a;
1526   ii    = a->i;
1527 
1528   switch (bs) {
1529   case 1:
1530     for ( i=0; i<mbs; i++ ) {
1531       n  = ii[1] - ii[0]; ii++;
1532       xb = x + i; x1 = xb[0];
1533       ib = idx + ai[i];
1534       for ( j=0; j<n; j++ ) {
1535         rval    = ib[j];
1536         z[rval] += *v++ * x1;
1537       }
1538     }
1539     break;
1540   case 2:
1541     for ( i=0; i<mbs; i++ ) {
1542       n  = ii[1] - ii[0]; ii++;
1543       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1544       ib = idx + ai[i];
1545       for ( j=0; j<n; j++ ) {
1546         rval      = ib[j]*2;
1547         z[rval++] += v[0]*x1 + v[1]*x2;
1548         z[rval++] += v[2]*x1 + v[3]*x2;
1549         v += 4;
1550       }
1551     }
1552     break;
1553   case 3:
1554     for ( i=0; i<mbs; i++ ) {
1555       n  = ii[1] - ii[0]; ii++;
1556       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1557       ib = idx + ai[i];
1558       for ( j=0; j<n; j++ ) {
1559         rval      = ib[j]*3;
1560         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1561         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1562         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1563         v += 9;
1564       }
1565     }
1566     break;
1567   case 4:
1568     for ( i=0; i<mbs; i++ ) {
1569       n  = ii[1] - ii[0]; ii++;
1570       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1571       ib = idx + ai[i];
1572       for ( j=0; j<n; j++ ) {
1573         rval      = ib[j]*4;
1574         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1575         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1576         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1577         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1578         v += 16;
1579       }
1580     }
1581     break;
1582   case 5:
1583     for ( i=0; i<mbs; i++ ) {
1584       n  = ii[1] - ii[0]; ii++;
1585       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1586       x4 = xb[3];   x5 = xb[4];
1587       ib = idx + ai[i];
1588       for ( j=0; j<n; j++ ) {
1589         rval      = ib[j]*5;
1590         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1591         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1592         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1593         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1594         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1595         v += 25;
1596       }
1597     }
1598     break;
1599       /* block sizes larger then 5 by 5 are handled by BLAS */
1600     default: {
1601       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1602       if (!a->mult_work) {
1603         k = PetscMax(a->m,a->n);
1604         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1605         CHKPTRQ(a->mult_work);
1606       }
1607       work = a->mult_work;
1608       for ( i=0; i<mbs; i++ ) {
1609         n     = ii[1] - ii[0]; ii++;
1610         ncols = n*bs;
1611         PetscMemzero(work,ncols*sizeof(Scalar));
1612         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1613         v += n*bs2;
1614         x += bs;
1615         workt = work;
1616         for ( j=0; j<n; j++ ) {
1617           zb = z + bs*(*idx++);
1618           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1619           workt += bs;
1620         }
1621       }
1622     }
1623   }
1624   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1625   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1626   PLogFlops(2*a->nz*a->bs2);
1627   return 0;
1628 }
1629 
1630 #undef __FUNC__
1631 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */
1632 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1633 {
1634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1635 
1636   info->rows_global    = (double)a->m;
1637   info->columns_global = (double)a->n;
1638   info->rows_local     = (double)a->m;
1639   info->columns_local  = (double)a->n;
1640   info->block_size     = a->bs2;
1641   info->nz_allocated   = a->maxnz;
1642   info->nz_used        = a->bs2*a->nz;
1643   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1644   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1645     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1646   info->assemblies   = A->num_ass;
1647   info->mallocs      = a->reallocs;
1648   info->memory       = A->mem;
1649   if (A->factor) {
1650     info->fill_ratio_given  = A->info.fill_ratio_given;
1651     info->fill_ratio_needed = A->info.fill_ratio_needed;
1652     info->factor_mallocs    = A->info.factor_mallocs;
1653   } else {
1654     info->fill_ratio_given  = 0;
1655     info->fill_ratio_needed = 0;
1656     info->factor_mallocs    = 0;
1657   }
1658   return 0;
1659 }
1660 
1661 #undef __FUNC__
1662 #define __FUNC__ "MatEqual_SeqBAIJ"
1663 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1664 {
1665   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1666 
1667   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1668 
1669   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1670   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1671       (a->nz != b->nz)) {
1672     *flg = PETSC_FALSE; return 0;
1673   }
1674 
1675   /* if the a->i are the same */
1676   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1677     *flg = PETSC_FALSE; return 0;
1678   }
1679 
1680   /* if a->j are the same */
1681   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1682     *flg = PETSC_FALSE; return 0;
1683   }
1684 
1685   /* if a->a are the same */
1686   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1687     *flg = PETSC_FALSE; return 0;
1688   }
1689   *flg = PETSC_TRUE;
1690   return 0;
1691 
1692 }
1693 
1694 #undef __FUNC__
1695 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1696 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1697 {
1698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1699   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1700   Scalar      *x, zero = 0.0,*aa,*aa_j;
1701 
1702   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1703   bs   = a->bs;
1704   aa   = a->a;
1705   ai   = a->i;
1706   aj   = a->j;
1707   ambs = a->mbs;
1708   bs2  = a->bs2;
1709 
1710   VecSet(&zero,v);
1711   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1712   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1713   for ( i=0; i<ambs; i++ ) {
1714     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1715       if (aj[j] == i) {
1716         row  = i*bs;
1717         aa_j = aa+j*bs2;
1718         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1719         break;
1720       }
1721     }
1722   }
1723   return 0;
1724 }
1725 
1726 #undef __FUNC__
1727 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1728 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1729 {
1730   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1731   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1732   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1733 
1734   ai  = a->i;
1735   aj  = a->j;
1736   aa  = a->a;
1737   m   = a->m;
1738   n   = a->n;
1739   bs  = a->bs;
1740   mbs = a->mbs;
1741   bs2 = a->bs2;
1742   if (ll) {
1743     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1744     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1745     for ( i=0; i<mbs; i++ ) { /* for each block row */
1746       M  = ai[i+1] - ai[i];
1747       li = l + i*bs;
1748       v  = aa + bs2*ai[i];
1749       for ( j=0; j<M; j++ ) { /* for each block */
1750         for ( k=0; k<bs2; k++ ) {
1751           (*v++) *= li[k%bs];
1752         }
1753       }
1754     }
1755   }
1756 
1757   if (rr) {
1758     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1759     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1760     for ( i=0; i<mbs; i++ ) { /* for each block row */
1761       M  = ai[i+1] - ai[i];
1762       v  = aa + bs2*ai[i];
1763       for ( j=0; j<M; j++ ) { /* for each block */
1764         ri = r + bs*aj[ai[i]+j];
1765         for ( k=0; k<bs; k++ ) {
1766           x = ri[k];
1767           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1768         }
1769       }
1770     }
1771   }
1772   return 0;
1773 }
1774 
1775 
1776 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1777 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1778 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1779 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1780 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1781 
1782 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1783 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1784 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1785 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1786 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1787 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1788 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1789 
1790 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1791 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1792 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1793 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1794 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1795 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1796 
1797 #undef __FUNC__
1798 #define __FUNC__ "MatNorm_SeqBAIJ"
1799 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1800 {
1801   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1802   Scalar      *v = a->a;
1803   double      sum = 0.0;
1804   int         i,nz=a->nz,bs2=a->bs2;
1805 
1806   if (type == NORM_FROBENIUS) {
1807     for (i=0; i< bs2*nz; i++ ) {
1808 #if defined(PETSC_COMPLEX)
1809       sum += real(conj(*v)*(*v)); v++;
1810 #else
1811       sum += (*v)*(*v); v++;
1812 #endif
1813     }
1814     *norm = sqrt(sum);
1815   }
1816   else {
1817     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1818   }
1819   return 0;
1820 }
1821 
1822 /*
1823      note: This can only work for identity for row and col. It would
1824    be good to check this and otherwise generate an error.
1825 */
1826 #undef __FUNC__
1827 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1828 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1829 {
1830   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1831   Mat         outA;
1832   int         ierr;
1833 
1834   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1835 
1836   outA          = inA;
1837   inA->factor   = FACTOR_LU;
1838   a->row        = row;
1839   a->col        = col;
1840 
1841   if (!a->solve_work) {
1842     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1843     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1844   }
1845 
1846   if (!a->diag) {
1847     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1848   }
1849   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1850   return 0;
1851 }
1852 
1853 #undef __FUNC__
1854 #define __FUNC__ "MatScale_SeqBAIJ"
1855 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1856 {
1857   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1858   int         one = 1, totalnz = a->bs2*a->nz;
1859   BLscal_( &totalnz, alpha, a->a, &one );
1860   PLogFlops(totalnz);
1861   return 0;
1862 }
1863 
1864 #undef __FUNC__
1865 #define __FUNC__ "MatGetValues_SeqBAIJ"
1866 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1867 {
1868   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1869   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1870   int        *ai = a->i, *ailen = a->ilen;
1871   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1872   Scalar     *ap, *aa = a->a, zero = 0.0;
1873 
1874   for ( k=0; k<m; k++ ) { /* loop over rows */
1875     row  = im[k]; brow = row/bs;
1876     if (row < 0) SETERRQ(1,0,"Negative row");
1877     if (row >= a->m) SETERRQ(1,0,"Row too large");
1878     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1879     nrow = ailen[brow];
1880     for ( l=0; l<n; l++ ) { /* loop over columns */
1881       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1882       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1883       col  = in[l] ;
1884       bcol = col/bs;
1885       cidx = col%bs;
1886       ridx = row%bs;
1887       high = nrow;
1888       low  = 0; /* assume unsorted */
1889       while (high-low > 5) {
1890         t = (low+high)/2;
1891         if (rp[t] > bcol) high = t;
1892         else             low  = t;
1893       }
1894       for ( i=low; i<high; i++ ) {
1895         if (rp[i] > bcol) break;
1896         if (rp[i] == bcol) {
1897           *v++ = ap[bs2*i+bs*cidx+ridx];
1898           goto finished;
1899         }
1900       }
1901       *v++ = zero;
1902       finished:;
1903     }
1904   }
1905   return 0;
1906 }
1907 
1908 #undef __FUNC__
1909 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */
1910 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1911 {
1912   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1913   *bs = baij->bs;
1914   return 0;
1915 }
1916 
1917 /* idx should be of length atlease bs */
1918 #undef __FUNC__
1919 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1920 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1921 {
1922   int i,row;
1923   row = idx[0];
1924   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1925 
1926   for ( i=1; i<bs; i++ ) {
1927     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1928   }
1929   *flg = PETSC_TRUE;
1930   return 0;
1931 }
1932 
1933 #undef __FUNC__
1934 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1935 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1936 {
1937   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1938   IS          is_local;
1939   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1940   PetscTruth  flg;
1941   Scalar      zero = 0.0,*aa;
1942 
1943   /* Make a copy of the IS and  sort it */
1944   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1945   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1946   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1947   ierr = ISSort(is_local); CHKERRQ(ierr);
1948   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1949 
1950   i = 0;
1951   while (i < is_n) {
1952     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1953     flg = PETSC_FALSE;
1954     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1955     if (flg) { /* There exists a block of rows to be Zerowed */
1956       baij->ilen[rows[i]/bs] = 0;
1957       i += bs;
1958     } else { /* Zero out only the requested row */
1959       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1960       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1961       for ( j=0; j<count; j++ ) {
1962         aa[0] = zero;
1963         aa+=bs;
1964       }
1965       i++;
1966     }
1967   }
1968   if (diag) {
1969     for ( j=0; j<is_n; j++ ) {
1970       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1971     }
1972   }
1973   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1974   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1975   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1976   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1977 
1978   return 0;
1979 }
1980 
1981 #undef __FUNC__
1982 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */
1983 int MatPrintHelp_SeqBAIJ(Mat A)
1984 {
1985   static int called = 0;
1986   MPI_Comm   comm = A->comm;
1987 
1988   if (called) return 0; else called = 1;
1989   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1990   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1991   return 0;
1992 }
1993 
1994 /* -------------------------------------------------------------------*/
1995 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1996        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1997        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1998        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1999        MatSolve_SeqBAIJ_N,0,
2000        0,0,
2001        MatLUFactor_SeqBAIJ,0,
2002        0,
2003        MatTranspose_SeqBAIJ,
2004        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
2005        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2006        0,MatAssemblyEnd_SeqBAIJ,
2007        0,
2008        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
2009        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2010        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2011        MatILUFactorSymbolic_SeqBAIJ,0,
2012        0,0,
2013        MatConvertSameType_SeqBAIJ,0,0,
2014        MatILUFactor_SeqBAIJ,0,0,
2015        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
2016        MatGetValues_SeqBAIJ,0,
2017        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
2018        0,0,0,MatGetBlockSize_SeqBAIJ,
2019        MatGetRowIJ_SeqBAIJ,
2020        MatRestoreRowIJ_SeqBAIJ,
2021        0,0,0,0,0,0,
2022        MatSetValuesBlocked_SeqBAIJ};
2023 
2024 #undef __FUNC__
2025 #define __FUNC__ "MatCreateSeqBAIJ"
2026 /*@C
2027    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2028    compressed row) format.  For good matrix assembly performance the
2029    user should preallocate the matrix storage by setting the parameter nz
2030    (or the array nzz).  By setting these parameters accurately, performance
2031    during matrix assembly can be increased by more than a factor of 50.
2032 
2033    Input Parameters:
2034 .  comm - MPI communicator, set to PETSC_COMM_SELF
2035 .  bs - size of block
2036 .  m - number of rows
2037 .  n - number of columns
2038 .  nz - number of block nonzeros per block row (same for all rows)
2039 .  nzz - array containing the number of block nonzeros in the various block rows
2040          (possibly different for each block row) or PETSC_NULL
2041 
2042    Output Parameter:
2043 .  A - the matrix
2044 
2045    Options Database Keys:
2046 $    -mat_no_unroll - uses code that does not unroll the loops in the
2047 $                     block calculations (much solver)
2048 $    -mat_block_size - size of the blocks to use
2049 
2050    Notes:
2051    The block AIJ format is fully compatible with standard Fortran 77
2052    storage.  That is, the stored row and column indices can begin at
2053    either one (as in Fortran) or zero.  See the users' manual for details.
2054 
2055    Specify the preallocated storage with either nz or nnz (not both).
2056    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2057    allocation.  For additional details, see the users manual chapter on
2058    matrices.
2059 
2060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2061 @*/
2062 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
2063 {
2064   Mat         B;
2065   Mat_SeqBAIJ *b;
2066   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
2067 
2068   MPI_Comm_size(comm,&size);
2069   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2070 
2071   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
2072 
2073   if (mbs*bs!=m || nbs*bs!=n)
2074     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
2075 
2076   *A = 0;
2077   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
2078   PLogObjectCreate(B);
2079   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
2080   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2081   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
2082   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
2083   if (!flg) {
2084     switch (bs) {
2085     case 1:
2086       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2087       B->ops.solve           = MatSolve_SeqBAIJ_1;
2088       B->ops.mult            = MatMult_SeqBAIJ_1;
2089       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
2090       break;
2091     case 2:
2092       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2093       B->ops.solve           = MatSolve_SeqBAIJ_2;
2094       B->ops.mult            = MatMult_SeqBAIJ_2;
2095       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
2096       break;
2097     case 3:
2098       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2099       B->ops.solve           = MatSolve_SeqBAIJ_3;
2100       B->ops.mult            = MatMult_SeqBAIJ_3;
2101       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
2102       break;
2103     case 4:
2104       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2105       B->ops.solve           = MatSolve_SeqBAIJ_4;
2106       B->ops.mult            = MatMult_SeqBAIJ_4;
2107       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
2108       break;
2109     case 5:
2110       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2111       B->ops.solve           = MatSolve_SeqBAIJ_5;
2112       B->ops.mult            = MatMult_SeqBAIJ_5;
2113       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
2114       break;
2115     case 7:
2116       B->ops.mult            = MatMult_SeqBAIJ_7;
2117       B->ops.solve           = MatSolve_SeqBAIJ_7;
2118       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2119       break;
2120     }
2121   }
2122   B->destroy          = MatDestroy_SeqBAIJ;
2123   B->view             = MatView_SeqBAIJ;
2124   B->factor           = 0;
2125   B->lupivotthreshold = 1.0;
2126   B->mapping          = 0;
2127   b->row              = 0;
2128   b->col              = 0;
2129   b->reallocs         = 0;
2130 
2131   b->m       = m; B->m = m; B->M = m;
2132   b->n       = n; B->n = n; B->N = n;
2133   b->mbs     = mbs;
2134   b->nbs     = nbs;
2135   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2136   if (nnz == PETSC_NULL) {
2137     if (nz == PETSC_DEFAULT) nz = 5;
2138     else if (nz <= 0)        nz = 1;
2139     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2140     nz = nz*mbs;
2141   }
2142   else {
2143     nz = 0;
2144     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2145   }
2146 
2147   /* allocate the matrix space */
2148   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2149   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2150   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2151   b->j  = (int *) (b->a + nz*bs2);
2152   PetscMemzero(b->j,nz*sizeof(int));
2153   b->i  = b->j + nz;
2154   b->singlemalloc = 1;
2155 
2156   b->i[0] = 0;
2157   for (i=1; i<mbs+1; i++) {
2158     b->i[i] = b->i[i-1] + b->imax[i-1];
2159   }
2160 
2161   /* b->ilen will count nonzeros in each block row so far. */
2162   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2163   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2164   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2165 
2166   b->bs               = bs;
2167   b->bs2              = bs2;
2168   b->mbs              = mbs;
2169   b->nz               = 0;
2170   b->maxnz            = nz*bs2;
2171   b->sorted           = 0;
2172   b->roworiented      = 1;
2173   b->nonew            = 0;
2174   b->diag             = 0;
2175   b->solve_work       = 0;
2176   b->mult_work        = 0;
2177   b->spptr            = 0;
2178   B->info.nz_unneeded = (double)b->maxnz;
2179 
2180   *A = B;
2181   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2182   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2183   return 0;
2184 }
2185 
2186 #undef __FUNC__
2187 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2188 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2189 {
2190   Mat         C;
2191   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2192   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2193 
2194   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
2195 
2196   *B = 0;
2197   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
2198   PLogObjectCreate(C);
2199   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2200   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2201   C->destroy    = MatDestroy_SeqBAIJ;
2202   C->view       = MatView_SeqBAIJ;
2203   C->factor     = A->factor;
2204   c->row        = 0;
2205   c->col        = 0;
2206   C->assembled  = PETSC_TRUE;
2207 
2208   c->m = C->m   = a->m;
2209   c->n = C->n   = a->n;
2210   C->M          = a->m;
2211   C->N          = a->n;
2212 
2213   c->bs         = a->bs;
2214   c->bs2        = a->bs2;
2215   c->mbs        = a->mbs;
2216   c->nbs        = a->nbs;
2217 
2218   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2219   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2220   for ( i=0; i<mbs; i++ ) {
2221     c->imax[i] = a->imax[i];
2222     c->ilen[i] = a->ilen[i];
2223   }
2224 
2225   /* allocate the matrix space */
2226   c->singlemalloc = 1;
2227   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2228   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2229   c->j  = (int *) (c->a + nz*bs2);
2230   c->i  = c->j + nz;
2231   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2232   if (mbs > 0) {
2233     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2234     if (cpvalues == COPY_VALUES) {
2235       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2236     }
2237   }
2238 
2239   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2240   c->sorted      = a->sorted;
2241   c->roworiented = a->roworiented;
2242   c->nonew       = a->nonew;
2243 
2244   if (a->diag) {
2245     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2246     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2247     for ( i=0; i<mbs; i++ ) {
2248       c->diag[i] = a->diag[i];
2249     }
2250   }
2251   else c->diag          = 0;
2252   c->nz                 = a->nz;
2253   c->maxnz              = a->maxnz;
2254   c->solve_work         = 0;
2255   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2256   c->mult_work          = 0;
2257   *B = C;
2258   return 0;
2259 }
2260 
2261 #undef __FUNC__
2262 #define __FUNC__ "MatLoad_SeqBAIJ"
2263 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2264 {
2265   Mat_SeqBAIJ  *a;
2266   Mat          B;
2267   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2268   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2269   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2270   int          *masked, nmask,tmp,bs2,ishift;
2271   Scalar       *aa;
2272   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2273 
2274   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2275   bs2  = bs*bs;
2276 
2277   MPI_Comm_size(comm,&size);
2278   if (size > 1) SETERRQ(1,0,"view must have one processor");
2279   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2280   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2281   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2282   M = header[1]; N = header[2]; nz = header[3];
2283 
2284   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2285 
2286   /*
2287      This code adds extra rows to make sure the number of rows is
2288     divisible by the blocksize
2289   */
2290   mbs        = M/bs;
2291   extra_rows = bs - M + bs*(mbs);
2292   if (extra_rows == bs) extra_rows = 0;
2293   else                  mbs++;
2294   if (extra_rows) {
2295     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2296   }
2297 
2298   /* read in row lengths */
2299   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2300   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2301   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2302 
2303   /* read in column indices */
2304   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2305   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2306   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2307 
2308   /* loop over row lengths determining block row lengths */
2309   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2310   PetscMemzero(browlengths,mbs*sizeof(int));
2311   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2312   PetscMemzero(mask,mbs*sizeof(int));
2313   masked = mask + mbs;
2314   rowcount = 0; nzcount = 0;
2315   for ( i=0; i<mbs; i++ ) {
2316     nmask = 0;
2317     for ( j=0; j<bs; j++ ) {
2318       kmax = rowlengths[rowcount];
2319       for ( k=0; k<kmax; k++ ) {
2320         tmp = jj[nzcount++]/bs;
2321         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2322       }
2323       rowcount++;
2324     }
2325     browlengths[i] += nmask;
2326     /* zero out the mask elements we set */
2327     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2328   }
2329 
2330   /* create our matrix */
2331   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2332          CHKERRQ(ierr);
2333   B = *A;
2334   a = (Mat_SeqBAIJ *) B->data;
2335 
2336   /* set matrix "i" values */
2337   a->i[0] = 0;
2338   for ( i=1; i<= mbs; i++ ) {
2339     a->i[i]      = a->i[i-1] + browlengths[i-1];
2340     a->ilen[i-1] = browlengths[i-1];
2341   }
2342   a->nz         = 0;
2343   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2344 
2345   /* read in nonzero values */
2346   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2347   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2348   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2349 
2350   /* set "a" and "j" values into matrix */
2351   nzcount = 0; jcount = 0;
2352   for ( i=0; i<mbs; i++ ) {
2353     nzcountb = nzcount;
2354     nmask    = 0;
2355     for ( j=0; j<bs; j++ ) {
2356       kmax = rowlengths[i*bs+j];
2357       for ( k=0; k<kmax; k++ ) {
2358         tmp = jj[nzcount++]/bs;
2359 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2360       }
2361       rowcount++;
2362     }
2363     /* sort the masked values */
2364     PetscSortInt(nmask,masked);
2365 
2366     /* set "j" values into matrix */
2367     maskcount = 1;
2368     for ( j=0; j<nmask; j++ ) {
2369       a->j[jcount++]  = masked[j];
2370       mask[masked[j]] = maskcount++;
2371     }
2372     /* set "a" values into matrix */
2373     ishift = bs2*a->i[i];
2374     for ( j=0; j<bs; j++ ) {
2375       kmax = rowlengths[i*bs+j];
2376       for ( k=0; k<kmax; k++ ) {
2377         tmp    = jj[nzcountb]/bs ;
2378         block  = mask[tmp] - 1;
2379         point  = jj[nzcountb] - bs*tmp;
2380         idx    = ishift + bs2*block + j + bs*point;
2381         a->a[idx] = aa[nzcountb++];
2382       }
2383     }
2384     /* zero out the mask elements we set */
2385     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2386   }
2387   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2388 
2389   PetscFree(rowlengths);
2390   PetscFree(browlengths);
2391   PetscFree(aa);
2392   PetscFree(jj);
2393   PetscFree(mask);
2394 
2395   B->assembled = PETSC_TRUE;
2396 
2397   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2398   if (flg) {
2399     Viewer tviewer;
2400     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2401     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2402     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2403     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2404   }
2405   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2406   if (flg) {
2407     Viewer tviewer;
2408     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2409     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2410     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2411     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2412   }
2413   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2414   if (flg) {
2415     Viewer tviewer;
2416     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2417     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2418     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2419   }
2420   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2421   if (flg) {
2422     Viewer tviewer;
2423     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2424     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2425     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2426     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2427   }
2428   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2429   if (flg) {
2430     Viewer tviewer;
2431     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2432     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2433     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2434     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2435   }
2436   return 0;
2437 }
2438 
2439 
2440 
2441