xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1de57adc8a1ca54985b5e97fb2fb4832b3ccb382)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.102 1997/05/23 18:38:09 balay Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 
10 #include "pinclude/pviewer.h"
11 #include "sys.h"
12 #include "src/mat/impls/baij/seq/baij.h"
13 #include "src/vec/vecimpl.h"
14 #include "src/inline/spops.h"
15 #include "petsc.h"
16 
17 
18 /*
19      Adds diagonal pointers to sparse matrix structure.
20 */
21 
22 #undef __FUNC__
23 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24 int MatMarkDiag_SeqBAIJ(Mat A)
25 {
26   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
27   int         i,j, *diag, m = a->mbs;
28 
29   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30   PLogObjectMemory(A,(m+1)*sizeof(int));
31   for ( i=0; i<m; i++ ) {
32     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33       if (a->j[j] == i) {
34         diag[i] = j;
35         break;
36       }
37     }
38   }
39   a->diag = diag;
40   return 0;
41 }
42 
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */
346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert1;
418         }
419       }
420       if (nonew == 1) goto noinsert1;
421       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
422       if (nrow >= rmax) {
423         /* there is no extra room in row, therefore enlarge */
424         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
425         Scalar *new_a;
426 
427         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
428 
429         /* Malloc new storage space */
430         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
431         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
432         new_j   = (int *) (new_a + bs2*new_nz);
433         new_i   = new_j + new_nz;
434 
435         /* copy over old data into new slots */
436         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
437         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
438         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
439         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
440         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
441                                                            len*sizeof(int));
442         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
443         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
444         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
445                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
446         /* free up old matrix storage */
447         PetscFree(a->a);
448         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
449         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
450         a->singlemalloc = 1;
451 
452         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
453         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
454         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
455         a->maxnz += bs2*CHUNKSIZE;
456         a->reallocs++;
457         a->nz++;
458       }
459       N = nrow++ - 1;
460       /* shift up all the later entries in this row */
461       for ( ii=N; ii>=i; ii-- ) {
462         rp[ii+1] = rp[ii];
463         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
464       }
465       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
466       rp[i]                      = bcol;
467       ap[bs2*i + bs*cidx + ridx] = value;
468       noinsert1:;
469       low = i;
470     }
471     ailen[brow] = nrow;
472   }
473   return 0;
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
478 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
479 {
480   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
481   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
482   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
483   int         roworiented=a->roworiented;
484   int         *aj=a->j,nonew=a->nonew;
485   int          bs2=a->bs2,bs=a->bs,stepval;
486   Scalar      *ap,*value=v,*aa=a->a,*bap;
487 
488   if (roworiented) {
489     stepval = (n-1)*bs;
490   } else {
491     stepval = (m-1)*bs;
492   }
493   for ( k=0; k<m; k++ ) { /* loop over added rows */
494     row  = im[k];
495 #if defined(PETSC_BOPT_g)
496     if (row < 0) SETERRQ(1,0,"Negative row");
497     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
498 #endif
499     rp   = aj + ai[row];
500     ap   = aa + bs2*ai[row];
501     rmax = imax[row];
502     nrow = ailen[row];
503     low  = 0;
504     for ( l=0; l<n; l++ ) { /* loop over added columns */
505 #if defined(PETSC_BOPT_g)
506       if (in[l] < 0) SETERRQ(1,0,"Negative column");
507       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
508 #endif
509       col = in[l];
510       if (roworiented) {
511         value = v + k*(stepval+bs)*bs + l*bs;
512       } else {
513         value = v + l*(stepval+bs)*bs + k*bs;
514       }
515       if (!sorted) low = 0; high = nrow;
516       while (high-low > 7) {
517         t = (low+high)/2;
518         if (rp[t] > col) high = t;
519         else             low  = t;
520       }
521       for ( i=low; i<high; i++ ) {
522         if (rp[i] > col) break;
523         if (rp[i] == col) {
524           bap  = ap +  bs2*i;
525           if (roworiented) {
526             if (is == ADD_VALUES) {
527               for ( ii=0; ii<bs; ii++,value+=stepval )
528                 for (jj=ii; jj<bs2; jj+=bs )
529                   bap[jj] += *value++;
530             } else {
531               for ( ii=0; ii<bs; ii++,value+=stepval )
532                 for (jj=ii; jj<bs2; jj+=bs )
533                   bap[jj] = *value++;
534             }
535           } else {
536             if (is == ADD_VALUES) {
537               for ( ii=0; ii<bs; ii++,value+=stepval )
538                 for (jj=0; jj<bs; jj++ )
539                   *bap++ += *value++;
540             } else {
541               for ( ii=0; ii<bs; ii++,value+=stepval )
542                 for (jj=0; jj<bs; jj++ )
543                   *bap++  = *value++;
544             }
545           }
546           goto noinsert2;
547         }
548       }
549       if (nonew == 1) goto noinsert2;
550       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
551       if (nrow >= rmax) {
552         /* there is no extra room in row, therefore enlarge */
553         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
554         Scalar *new_a;
555 
556         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
557 
558         /* malloc new storage space */
559         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
560         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
561         new_j   = (int *) (new_a + bs2*new_nz);
562         new_i   = new_j + new_nz;
563 
564         /* copy over old data into new slots */
565         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
566         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
567         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
568         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
569         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
570                                                            len*sizeof(int));
571         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
572         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
573         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
574                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
575         /* free up old matrix storage */
576         PetscFree(a->a);
577         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
578         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
579         a->singlemalloc = 1;
580 
581         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
582         rmax = imax[row] = imax[row] + CHUNKSIZE;
583         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
584         a->maxnz += bs2*CHUNKSIZE;
585         a->reallocs++;
586         a->nz++;
587       }
588       N = nrow++ - 1;
589       /* shift up all the later entries in this row */
590       for ( ii=N; ii>=i; ii-- ) {
591         rp[ii+1] = rp[ii];
592         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
593       }
594       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
595       rp[i] = col;
596       bap   = ap +  bs2*i;
597       if (roworiented) {
598         for ( ii=0; ii<bs; ii++,value+=stepval)
599           for (jj=ii; jj<bs2; jj+=bs )
600             bap[jj] = *value++;
601       } else {
602         for ( ii=0; ii<bs; ii++,value+=stepval )
603           for (jj=0; jj<bs; jj++ )
604             *bap++  = *value++;
605       }
606       noinsert2:;
607       low = i;
608     }
609     ailen[row] = nrow;
610   }
611   return 0;
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */
616 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
617 {
618   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
619   *m = a->m; *n = a->n;
620   return 0;
621 }
622 
623 #undef __FUNC__
624 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */
625 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
626 {
627   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
628   *m = 0; *n = a->m;
629   return 0;
630 }
631 
632 #undef __FUNC__
633 #define __FUNC__ "MatGetRow_SeqBAIJ"
634 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
635 {
636   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
637   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
638   Scalar      *aa,*v_i,*aa_i;
639 
640   bs  = a->bs;
641   ai  = a->i;
642   aj  = a->j;
643   aa  = a->a;
644   bs2 = a->bs2;
645 
646   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
647 
648   bn  = row/bs;   /* Block number */
649   bp  = row % bs; /* Block Position */
650   M   = ai[bn+1] - ai[bn];
651   *nz = bs*M;
652 
653   if (v) {
654     *v = 0;
655     if (*nz) {
656       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
657       for ( i=0; i<M; i++ ) { /* for each block in the block row */
658         v_i  = *v + i*bs;
659         aa_i = aa + bs2*(ai[bn] + i);
660         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
661       }
662     }
663   }
664 
665   if (idx) {
666     *idx = 0;
667     if (*nz) {
668       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
669       for ( i=0; i<M; i++ ) { /* for each block in the block row */
670         idx_i = *idx + i*bs;
671         itmp  = bs*aj[ai[bn] + i];
672         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
673       }
674     }
675   }
676   return 0;
677 }
678 
679 #undef __FUNC__
680 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */
681 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
682 {
683   if (idx) {if (*idx) PetscFree(*idx);}
684   if (v)   {if (*v)   PetscFree(*v);}
685   return 0;
686 }
687 
688 #undef __FUNC__
689 #define __FUNC__ "MatTranspose_SeqBAIJ"
690 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
691 {
692   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
693   Mat         C;
694   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
695   int         *rows,*cols,bs2=a->bs2;
696   Scalar      *array=a->a;
697 
698   if (B==PETSC_NULL && mbs!=nbs)
699     SETERRQ(1,0,"Square matrix only for in-place");
700   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
701   PetscMemzero(col,(1+nbs)*sizeof(int));
702 
703   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
704   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
705   PetscFree(col);
706   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
707   cols = rows + bs;
708   for ( i=0; i<mbs; i++ ) {
709     cols[0] = i*bs;
710     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
711     len = ai[i+1] - ai[i];
712     for ( j=0; j<len; j++ ) {
713       rows[0] = (*aj++)*bs;
714       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
715       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
716       array += bs2;
717     }
718   }
719   PetscFree(rows);
720 
721   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
723 
724   if (B != PETSC_NULL) {
725     *B = C;
726   } else {
727     /* This isn't really an in-place transpose */
728     PetscFree(a->a);
729     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
730     if (a->diag) PetscFree(a->diag);
731     if (a->ilen) PetscFree(a->ilen);
732     if (a->imax) PetscFree(a->imax);
733     if (a->solve_work) PetscFree(a->solve_work);
734     PetscFree(a);
735     PetscMemcpy(A,C,sizeof(struct _p_Mat));
736     PetscHeaderDestroy(C);
737   }
738   return 0;
739 }
740 
741 
742 #undef __FUNC__
743 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
744 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
745 {
746   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
747   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
748   int        m = a->m,*ip, N, *ailen = a->ilen;
749   int        mbs = a->mbs, bs2 = a->bs2,rmax;
750   Scalar     *aa = a->a, *ap;
751 
752   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
753 
754   rmax = ailen[0];
755   for ( i=1; i<mbs; i++ ) {
756     /* move each row back by the amount of empty slots (fshift) before it*/
757     fshift += imax[i-1] - ailen[i-1];
758     rmax   = PetscMax(rmax,ailen[i]);
759     if (fshift) {
760       ip = aj + ai[i]; ap = aa + bs2*ai[i];
761       N = ailen[i];
762       for ( j=0; j<N; j++ ) {
763         ip[j-fshift] = ip[j];
764         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
765       }
766     }
767     ai[i] = ai[i-1] + ailen[i-1];
768   }
769   if (mbs) {
770     fshift += imax[mbs-1] - ailen[mbs-1];
771     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
772   }
773   /* reset ilen and imax for each row */
774   for ( i=0; i<mbs; i++ ) {
775     ailen[i] = imax[i] = ai[i+1] - ai[i];
776   }
777   a->nz = ai[mbs];
778 
779   /* diagonals may have moved, so kill the diagonal pointers */
780   if (fshift && a->diag) {
781     PetscFree(a->diag);
782     PLogObjectMemory(A,-(m+1)*sizeof(int));
783     a->diag = 0;
784   }
785   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
786            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
787   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
788            a->reallocs);
789   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
790   a->reallocs          = 0;
791   A->info.nz_unneeded  = (double)fshift*bs2;
792 
793   return 0;
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
798 int MatZeroEntries_SeqBAIJ(Mat A)
799 {
800   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
801   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
802   return 0;
803 }
804 
805 #undef __FUNC__
806 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */
807 int MatDestroy_SeqBAIJ(PetscObject obj)
808 {
809   Mat         A  = (Mat) obj;
810   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
811   int         ierr;
812 
813 #if defined(PETSC_LOG)
814   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
815 #endif
816   PetscFree(a->a);
817   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
818   if (a->diag) PetscFree(a->diag);
819   if (a->ilen) PetscFree(a->ilen);
820   if (a->imax) PetscFree(a->imax);
821   if (a->solve_work) PetscFree(a->solve_work);
822   if (a->mult_work) PetscFree(a->mult_work);
823   PetscFree(a);
824   if (A->mapping) {
825     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
826   }
827   PLogObjectDestroy(A);
828   PetscHeaderDestroy(A);
829   return 0;
830 }
831 
832 #undef __FUNC__
833 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */
834 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
835 {
836   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
837   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
838   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
839   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
840   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
841   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
842   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
843   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
844   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
845   else if (op == MAT_ROWS_SORTED ||
846            op == MAT_ROWS_UNSORTED ||
847            op == MAT_SYMMETRIC ||
848            op == MAT_STRUCTURALLY_SYMMETRIC ||
849            op == MAT_YES_NEW_DIAGONALS ||
850            op == MAT_IGNORE_OFF_PROC_ENTRIES)
851     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
852   else if (op == MAT_NO_NEW_DIAGONALS)
853     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
854   else
855     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
856   return 0;
857 }
858 
859 
860 /* -------------------------------------------------------*/
861 /* Should check that shapes of vectors and matrices match */
862 /* -------------------------------------------------------*/
863 #include "pinclude/plapack.h"
864 
865 #undef __FUNC__
866 #define __FUNC__ "MatMult_SeqBAIJ_1"
867 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
868 {
869   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
870   register Scalar *x,*z,*v,sum;
871   int             mbs=a->mbs,i,*idx,*ii,n;
872 
873   VecGetArray_Fast(xx,x);
874   VecGetArray_Fast(zz,z);
875 
876   idx   = a->j;
877   v     = a->a;
878   ii    = a->i;
879 
880   for ( i=0; i<mbs; i++ ) {
881     n    = ii[1] - ii[0]; ii++;
882     sum  = 0.0;
883     while (n--) sum += *v++ * x[*idx++];
884     z[i] = sum;
885   }
886   VecRestoreArray_Fast(xx,x);
887   VecRestoreArray_Fast(zz,z);
888   PLogFlops(2*a->nz - a->m);
889   return 0;
890 }
891 
892 #undef __FUNC__
893 #define __FUNC__ "MatMult_SeqBAIJ_2"
894 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
895 {
896   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
897   register Scalar *x,*z,*v,*xb,sum1,sum2;
898   register Scalar x1,x2;
899   int             mbs=a->mbs,i,*idx,*ii;
900   int             j,n;
901 
902   VecGetArray_Fast(xx,x);
903   VecGetArray_Fast(zz,z);
904 
905   idx   = a->j;
906   v     = a->a;
907   ii    = a->i;
908 
909   for ( i=0; i<mbs; i++ ) {
910     n  = ii[1] - ii[0]; ii++;
911     sum1 = 0.0; sum2 = 0.0;
912     for ( j=0; j<n; j++ ) {
913       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
914       sum1 += v[0]*x1 + v[2]*x2;
915       sum2 += v[1]*x1 + v[3]*x2;
916       v += 4;
917     }
918     z[0] = sum1; z[1] = sum2;
919     z += 2;
920   }
921   VecRestoreArray_Fast(xx,x);
922   VecRestoreArray_Fast(zz,z);
923   PLogFlops(4*a->nz - a->m);
924   return 0;
925 }
926 
927 #undef __FUNC__
928 #define __FUNC__ "MatMult_SeqBAIJ_3"
929 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
930 {
931   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
932   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
933   int             mbs=a->mbs,i,*idx,*ii,j,n;
934 
935   VecGetArray_Fast(xx,x);
936   VecGetArray_Fast(zz,z);
937 
938   idx   = a->j;
939   v     = a->a;
940   ii    = a->i;
941 
942   for ( i=0; i<mbs; i++ ) {
943     n  = ii[1] - ii[0]; ii++;
944     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
945     for ( j=0; j<n; j++ ) {
946       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
947       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
948       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
949       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
950       v += 9;
951     }
952     z[0] = sum1; z[1] = sum2; z[2] = sum3;
953     z += 3;
954   }
955   VecRestoreArray_Fast(xx,x);
956   VecRestoreArray_Fast(zz,z);
957   PLogFlops(18*a->nz - a->m);
958   return 0;
959 }
960 
961 #undef __FUNC__
962 #define __FUNC__ "MatMult_SeqBAIJ_4"
963 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
964 {
965   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
966   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
967   register Scalar x1,x2,x3,x4;
968   int             mbs=a->mbs,i,*idx,*ii;
969   int             j,n;
970 
971   VecGetArray_Fast(xx,x);
972   VecGetArray_Fast(zz,z);
973 
974   idx   = a->j;
975   v     = a->a;
976   ii    = a->i;
977 
978   for ( i=0; i<mbs; i++ ) {
979     n  = ii[1] - ii[0]; ii++;
980     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
981     for ( j=0; j<n; j++ ) {
982       xb = x + 4*(*idx++);
983       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
984       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
985       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
986       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
987       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
988       v += 16;
989     }
990     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
991     z += 4;
992   }
993   VecRestoreArray_Fast(xx,x);
994   VecRestoreArray_Fast(zz,z);
995   PLogFlops(32*a->nz - a->m);
996   return 0;
997 }
998 
999 #undef __FUNC__
1000 #define __FUNC__ "MatMult_SeqBAIJ_5"
1001 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
1002 {
1003   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1004   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1005   register Scalar x1,x2,x3,x4,x5;
1006   int             mbs=a->mbs,i,*idx,*ii,j,n;
1007 
1008   VecGetArray_Fast(xx,x);
1009   VecGetArray_Fast(zz,z);
1010 
1011   idx   = a->j;
1012   v     = a->a;
1013   ii    = a->i;
1014 
1015   for ( i=0; i<mbs; i++ ) {
1016     n  = ii[1] - ii[0]; ii++;
1017     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1018     for ( j=0; j<n; j++ ) {
1019       xb = x + 5*(*idx++);
1020       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1021       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1022       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1023       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1024       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1025       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1026       v += 25;
1027     }
1028     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1029     z += 5;
1030   }
1031   VecRestoreArray_Fast(xx,x);
1032   VecRestoreArray_Fast(zz,z);
1033   PLogFlops(50*a->nz - a->m);
1034   return 0;
1035 }
1036 
1037 #undef __FUNC__
1038 #define __FUNC__ "MatMult_SeqBAIJ_7"
1039 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1040 {
1041   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1042   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1043   register Scalar x1,x2,x3,x4,x5,x6,x7;
1044   int             mbs=a->mbs,i,*idx,*ii,j,n;
1045 
1046   VecGetArray_Fast(xx,x);
1047   VecGetArray_Fast(zz,z);
1048 
1049   idx   = a->j;
1050   v     = a->a;
1051   ii    = a->i;
1052 
1053   for ( i=0; i<mbs; i++ ) {
1054     n  = ii[1] - ii[0]; ii++;
1055     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1056     for ( j=0; j<n; j++ ) {
1057       xb = x + 7*(*idx++);
1058       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1059       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1060       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1061       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1062       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1063       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1064       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1065       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1066       v += 49;
1067     }
1068     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1069     z += 7;
1070   }
1071 
1072   VecRestoreArray_Fast(xx,x);
1073   VecRestoreArray_Fast(zz,z);
1074   PLogFlops(98*a->nz - a->m);
1075   return 0;
1076 }
1077 
1078 #undef __FUNC__
1079 #define __FUNC__ "MatMult_SeqBAIJ_N"
1080 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1081 {
1082   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1083   register Scalar *x,*z,*v,*xb;
1084   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1085   int             _One = 1,ncols,k;
1086   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1087 
1088   VecGetArray_Fast(xx,x);
1089   VecGetArray_Fast(zz,z);
1090 
1091   idx   = a->j;
1092   v     = a->a;
1093   ii    = a->i;
1094 
1095 
1096   if (!a->mult_work) {
1097     k = PetscMax(a->m,a->n);
1098     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1099   }
1100   work = a->mult_work;
1101   for ( i=0; i<mbs; i++ ) {
1102     n     = ii[1] - ii[0]; ii++;
1103     ncols = n*bs;
1104     workt = work;
1105     for ( j=0; j<n; j++ ) {
1106       xb = x + bs*(*idx++);
1107       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1108       workt += bs;
1109     }
1110     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1111     v += n*bs2;
1112     z += bs;
1113   }
1114   VecRestoreArray_Fast(xx,x);
1115   VecRestoreArray_Fast(zz,z);
1116   PLogFlops(2*a->nz*bs2 - a->m);
1117   return 0;
1118 }
1119 
1120 #undef __FUNC__
1121 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1122 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1123 {
1124   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1125   register Scalar *x,*y,*z,*v,sum;
1126   int             mbs=a->mbs,i,*idx,*ii,n;
1127 
1128   VecGetArray_Fast(xx,x);
1129   VecGetArray_Fast(yy,y);
1130   VecGetArray_Fast(zz,z);
1131 
1132   idx   = a->j;
1133   v     = a->a;
1134   ii    = a->i;
1135 
1136   for ( i=0; i<mbs; i++ ) {
1137     n    = ii[1] - ii[0]; ii++;
1138     sum  = y[i];
1139     while (n--) sum += *v++ * x[*idx++];
1140     z[i] = sum;
1141   }
1142   VecRestoreArray_Fast(xx,x);
1143   VecRestoreArray_Fast(yy,y);
1144   VecRestoreArray_Fast(zz,z);
1145   PLogFlops(2*a->nz);
1146   return 0;
1147 }
1148 
1149 #undef __FUNC__
1150 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1151 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1152 {
1153   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1154   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1155   register Scalar x1,x2;
1156   int             mbs=a->mbs,i,*idx,*ii;
1157   int             j,n;
1158 
1159   VecGetArray_Fast(xx,x);
1160   VecGetArray_Fast(yy,y);
1161   VecGetArray_Fast(zz,z);
1162 
1163   idx   = a->j;
1164   v     = a->a;
1165   ii    = a->i;
1166 
1167   for ( i=0; i<mbs; i++ ) {
1168     n  = ii[1] - ii[0]; ii++;
1169     sum1 = y[0]; sum2 = y[1];
1170     for ( j=0; j<n; j++ ) {
1171       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1172       sum1 += v[0]*x1 + v[2]*x2;
1173       sum2 += v[1]*x1 + v[3]*x2;
1174       v += 4;
1175     }
1176     z[0] = sum1; z[1] = sum2;
1177     z += 2; y += 2;
1178   }
1179   VecRestoreArray_Fast(xx,x);
1180   VecRestoreArray_Fast(yy,y);
1181   VecRestoreArray_Fast(zz,z);
1182   PLogFlops(4*a->nz);
1183   return 0;
1184 }
1185 
1186 #undef __FUNC__
1187 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1188 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1189 {
1190   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1191   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1192   int             mbs=a->mbs,i,*idx,*ii,j,n;
1193 
1194   VecGetArray_Fast(xx,x);
1195   VecGetArray_Fast(yy,y);
1196   VecGetArray_Fast(zz,z);
1197 
1198   idx   = a->j;
1199   v     = a->a;
1200   ii    = a->i;
1201 
1202   for ( i=0; i<mbs; i++ ) {
1203     n  = ii[1] - ii[0]; ii++;
1204     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1205     for ( j=0; j<n; j++ ) {
1206       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1207       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1208       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1209       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1210       v += 9;
1211     }
1212     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1213     z += 3; y += 3;
1214   }
1215   VecRestoreArray_Fast(xx,x);
1216   VecRestoreArray_Fast(yy,y);
1217   VecRestoreArray_Fast(zz,z);
1218   PLogFlops(18*a->nz);
1219   return 0;
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1224 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1225 {
1226   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1227   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1228   register Scalar x1,x2,x3,x4;
1229   int             mbs=a->mbs,i,*idx,*ii;
1230   int             j,n;
1231 
1232   VecGetArray_Fast(xx,x);
1233   VecGetArray_Fast(yy,y);
1234   VecGetArray_Fast(zz,z);
1235 
1236   idx   = a->j;
1237   v     = a->a;
1238   ii    = a->i;
1239 
1240   for ( i=0; i<mbs; i++ ) {
1241     n  = ii[1] - ii[0]; ii++;
1242     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1243     for ( j=0; j<n; j++ ) {
1244       xb = x + 4*(*idx++);
1245       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1246       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1247       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1248       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1249       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1250       v += 16;
1251     }
1252     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1253     z += 4; y += 4;
1254   }
1255   VecRestoreArray_Fast(xx,x);
1256   VecRestoreArray_Fast(yy,y);
1257   VecRestoreArray_Fast(zz,z);
1258   PLogFlops(32*a->nz);
1259   return 0;
1260 }
1261 
1262 #undef __FUNC__
1263 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1264 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1265 {
1266   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1267   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1268   register Scalar x1,x2,x3,x4,x5;
1269   int             mbs=a->mbs,i,*idx,*ii,j,n;
1270 
1271   VecGetArray_Fast(xx,x);
1272   VecGetArray_Fast(yy,y);
1273   VecGetArray_Fast(zz,z);
1274 
1275   idx   = a->j;
1276   v     = a->a;
1277   ii    = a->i;
1278 
1279   for ( i=0; i<mbs; i++ ) {
1280     n  = ii[1] - ii[0]; ii++;
1281     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1282     for ( j=0; j<n; j++ ) {
1283       xb = x + 5*(*idx++);
1284       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1285       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1286       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1287       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1288       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1289       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1290       v += 25;
1291     }
1292     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1293     z += 5; y += 5;
1294   }
1295   VecRestoreArray_Fast(xx,x);
1296   VecRestoreArray_Fast(yy,y);
1297   VecRestoreArray_Fast(zz,z);
1298   PLogFlops(50*a->nz);
1299   return 0;
1300 }
1301 
1302 #undef __FUNC__
1303 #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1304 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1305 {
1306   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1307   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1308   register Scalar x1,x2,x3,x4,x5,x6,x7;
1309   int             mbs=a->mbs,i,*idx,*ii,j,n;
1310 
1311   VecGetArray_Fast(xx,x);
1312   VecGetArray_Fast(yy,y);
1313   VecGetArray_Fast(zz,z);
1314 
1315   idx   = a->j;
1316   v     = a->a;
1317   ii    = a->i;
1318 
1319   for ( i=0; i<mbs; i++ ) {
1320     n  = ii[1] - ii[0]; ii++;
1321     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1322     for ( j=0; j<n; j++ ) {
1323       xb = x + 7*(*idx++);
1324       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1325       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1326       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1327       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1328       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1329       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1330       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1331       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1332       v += 49;
1333     }
1334     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1335     z += 7; y += 7;
1336   }
1337   VecRestoreArray_Fast(xx,x);
1338   VecRestoreArray_Fast(yy,y);
1339   VecRestoreArray_Fast(zz,z);
1340   PLogFlops(98*a->nz);
1341   return 0;
1342 }
1343 
1344 
1345 #undef __FUNC__
1346 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1347 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1348 {
1349   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1350   register Scalar *x,*z,*v,*xb;
1351   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1352   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1353 
1354   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1355 
1356   VecGetArray_Fast(xx,x);
1357   VecGetArray_Fast(zz,z);
1358 
1359   idx   = a->j;
1360   v     = a->a;
1361   ii    = a->i;
1362 
1363 
1364   if (!a->mult_work) {
1365     k = PetscMax(a->m,a->n);
1366     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1367   }
1368   work = a->mult_work;
1369   for ( i=0; i<mbs; i++ ) {
1370     n     = ii[1] - ii[0]; ii++;
1371     ncols = n*bs;
1372     workt = work;
1373     for ( j=0; j<n; j++ ) {
1374       xb = x + bs*(*idx++);
1375       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1376       workt += bs;
1377     }
1378     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1379     v += n*bs2;
1380     z += bs;
1381   }
1382   VecRestoreArray_Fast(xx,x);
1383   VecRestoreArray_Fast(zz,z);
1384   PLogFlops(2*a->nz*bs2 );
1385   return 0;
1386 }
1387 
1388 #undef __FUNC__
1389 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1390 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1391 {
1392   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1393   Scalar          *xg,*zg,*zb;
1394   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1395   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1396   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1397 
1398 
1399   VecGetArray_Fast(xx,xg); x = xg;
1400   VecGetArray_Fast(zz,zg); z = zg;
1401   PetscMemzero(z,N*sizeof(Scalar));
1402 
1403   idx   = a->j;
1404   v     = a->a;
1405   ii    = a->i;
1406 
1407   switch (bs) {
1408   case 1:
1409     for ( i=0; i<mbs; i++ ) {
1410       n  = ii[1] - ii[0]; ii++;
1411       xb = x + i; x1 = xb[0];
1412       ib = idx + ai[i];
1413       for ( j=0; j<n; j++ ) {
1414         rval    = ib[j];
1415         z[rval] += *v++ * x1;
1416       }
1417     }
1418     break;
1419   case 2:
1420     for ( i=0; i<mbs; i++ ) {
1421       n  = ii[1] - ii[0]; ii++;
1422       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1423       ib = idx + ai[i];
1424       for ( j=0; j<n; j++ ) {
1425         rval      = ib[j]*2;
1426         z[rval++] += v[0]*x1 + v[1]*x2;
1427         z[rval++] += v[2]*x1 + v[3]*x2;
1428         v += 4;
1429       }
1430     }
1431     break;
1432   case 3:
1433     for ( i=0; i<mbs; i++ ) {
1434       n  = ii[1] - ii[0]; ii++;
1435       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1436       ib = idx + ai[i];
1437       for ( j=0; j<n; j++ ) {
1438         rval      = ib[j]*3;
1439         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1440         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1441         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1442         v += 9;
1443       }
1444     }
1445     break;
1446   case 4:
1447     for ( i=0; i<mbs; i++ ) {
1448       n  = ii[1] - ii[0]; ii++;
1449       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1450       ib = idx + ai[i];
1451       for ( j=0; j<n; j++ ) {
1452         rval      = ib[j]*4;
1453         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1454         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1455         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1456         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1457         v += 16;
1458       }
1459     }
1460     break;
1461   case 5:
1462     for ( i=0; i<mbs; i++ ) {
1463       n  = ii[1] - ii[0]; ii++;
1464       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1465       x4 = xb[3];   x5 = xb[4];
1466       ib = idx + ai[i];
1467       for ( j=0; j<n; j++ ) {
1468         rval      = ib[j]*5;
1469         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1470         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1471         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1472         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1473         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1474         v += 25;
1475       }
1476     }
1477     break;
1478       /* block sizes larger then 5 by 5 are handled by BLAS */
1479     default: {
1480       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1481       if (!a->mult_work) {
1482         k = PetscMax(a->m,a->n);
1483         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1484         CHKPTRQ(a->mult_work);
1485       }
1486       work = a->mult_work;
1487       for ( i=0; i<mbs; i++ ) {
1488         n     = ii[1] - ii[0]; ii++;
1489         ncols = n*bs;
1490         PetscMemzero(work,ncols*sizeof(Scalar));
1491         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1492         v += n*bs2;
1493         x += bs;
1494         workt = work;
1495         for ( j=0; j<n; j++ ) {
1496           zb = z + bs*(*idx++);
1497           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1498           workt += bs;
1499         }
1500       }
1501     }
1502   }
1503   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1504   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1505   PLogFlops(2*a->nz*a->bs2 - a->n);
1506   return 0;
1507 }
1508 
1509 #undef __FUNC__
1510 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1511 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1512 {
1513   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1514   Scalar          *xg,*zg,*zb;
1515   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1516   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1517   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1518 
1519 
1520 
1521   VecGetArray_Fast(xx,xg); x = xg;
1522   VecGetArray_Fast(zz,zg); z = zg;
1523 
1524   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1525   else PetscMemzero(z,N*sizeof(Scalar));
1526 
1527 
1528   idx   = a->j;
1529   v     = a->a;
1530   ii    = a->i;
1531 
1532   switch (bs) {
1533   case 1:
1534     for ( i=0; i<mbs; i++ ) {
1535       n  = ii[1] - ii[0]; ii++;
1536       xb = x + i; x1 = xb[0];
1537       ib = idx + ai[i];
1538       for ( j=0; j<n; j++ ) {
1539         rval    = ib[j];
1540         z[rval] += *v++ * x1;
1541       }
1542     }
1543     break;
1544   case 2:
1545     for ( i=0; i<mbs; i++ ) {
1546       n  = ii[1] - ii[0]; ii++;
1547       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1548       ib = idx + ai[i];
1549       for ( j=0; j<n; j++ ) {
1550         rval      = ib[j]*2;
1551         z[rval++] += v[0]*x1 + v[1]*x2;
1552         z[rval++] += v[2]*x1 + v[3]*x2;
1553         v += 4;
1554       }
1555     }
1556     break;
1557   case 3:
1558     for ( i=0; i<mbs; i++ ) {
1559       n  = ii[1] - ii[0]; ii++;
1560       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1561       ib = idx + ai[i];
1562       for ( j=0; j<n; j++ ) {
1563         rval      = ib[j]*3;
1564         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1565         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1566         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1567         v += 9;
1568       }
1569     }
1570     break;
1571   case 4:
1572     for ( i=0; i<mbs; i++ ) {
1573       n  = ii[1] - ii[0]; ii++;
1574       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1575       ib = idx + ai[i];
1576       for ( j=0; j<n; j++ ) {
1577         rval      = ib[j]*4;
1578         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1579         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1580         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1581         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1582         v += 16;
1583       }
1584     }
1585     break;
1586   case 5:
1587     for ( i=0; i<mbs; i++ ) {
1588       n  = ii[1] - ii[0]; ii++;
1589       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1590       x4 = xb[3];   x5 = xb[4];
1591       ib = idx + ai[i];
1592       for ( j=0; j<n; j++ ) {
1593         rval      = ib[j]*5;
1594         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1595         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1596         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1597         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1598         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1599         v += 25;
1600       }
1601     }
1602     break;
1603       /* block sizes larger then 5 by 5 are handled by BLAS */
1604     default: {
1605       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1606       if (!a->mult_work) {
1607         k = PetscMax(a->m,a->n);
1608         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1609         CHKPTRQ(a->mult_work);
1610       }
1611       work = a->mult_work;
1612       for ( i=0; i<mbs; i++ ) {
1613         n     = ii[1] - ii[0]; ii++;
1614         ncols = n*bs;
1615         PetscMemzero(work,ncols*sizeof(Scalar));
1616         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1617         v += n*bs2;
1618         x += bs;
1619         workt = work;
1620         for ( j=0; j<n; j++ ) {
1621           zb = z + bs*(*idx++);
1622           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1623           workt += bs;
1624         }
1625       }
1626     }
1627   }
1628   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1629   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1630   PLogFlops(2*a->nz*a->bs2);
1631   return 0;
1632 }
1633 
1634 #undef __FUNC__
1635 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */
1636 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1637 {
1638   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1639 
1640   info->rows_global    = (double)a->m;
1641   info->columns_global = (double)a->n;
1642   info->rows_local     = (double)a->m;
1643   info->columns_local  = (double)a->n;
1644   info->block_size     = a->bs2;
1645   info->nz_allocated   = a->maxnz;
1646   info->nz_used        = a->bs2*a->nz;
1647   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1648   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1649     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1650   info->assemblies   = A->num_ass;
1651   info->mallocs      = a->reallocs;
1652   info->memory       = A->mem;
1653   if (A->factor) {
1654     info->fill_ratio_given  = A->info.fill_ratio_given;
1655     info->fill_ratio_needed = A->info.fill_ratio_needed;
1656     info->factor_mallocs    = A->info.factor_mallocs;
1657   } else {
1658     info->fill_ratio_given  = 0;
1659     info->fill_ratio_needed = 0;
1660     info->factor_mallocs    = 0;
1661   }
1662   return 0;
1663 }
1664 
1665 #undef __FUNC__
1666 #define __FUNC__ "MatEqual_SeqBAIJ"
1667 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1668 {
1669   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1670 
1671   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1672 
1673   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1674   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1675       (a->nz != b->nz)) {
1676     *flg = PETSC_FALSE; return 0;
1677   }
1678 
1679   /* if the a->i are the same */
1680   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1681     *flg = PETSC_FALSE; return 0;
1682   }
1683 
1684   /* if a->j are the same */
1685   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1686     *flg = PETSC_FALSE; return 0;
1687   }
1688 
1689   /* if a->a are the same */
1690   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1691     *flg = PETSC_FALSE; return 0;
1692   }
1693   *flg = PETSC_TRUE;
1694   return 0;
1695 
1696 }
1697 
1698 #undef __FUNC__
1699 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1700 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1701 {
1702   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1703   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1704   Scalar      *x, zero = 0.0,*aa,*aa_j;
1705 
1706   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1707   bs   = a->bs;
1708   aa   = a->a;
1709   ai   = a->i;
1710   aj   = a->j;
1711   ambs = a->mbs;
1712   bs2  = a->bs2;
1713 
1714   VecSet(&zero,v);
1715   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1716   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1717   for ( i=0; i<ambs; i++ ) {
1718     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1719       if (aj[j] == i) {
1720         row  = i*bs;
1721         aa_j = aa+j*bs2;
1722         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1723         break;
1724       }
1725     }
1726   }
1727   return 0;
1728 }
1729 
1730 #undef __FUNC__
1731 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1732 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1733 {
1734   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1735   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1736   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1737 
1738   ai  = a->i;
1739   aj  = a->j;
1740   aa  = a->a;
1741   m   = a->m;
1742   n   = a->n;
1743   bs  = a->bs;
1744   mbs = a->mbs;
1745   bs2 = a->bs2;
1746   if (ll) {
1747     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1748     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1749     for ( i=0; i<mbs; i++ ) { /* for each block row */
1750       M  = ai[i+1] - ai[i];
1751       li = l + i*bs;
1752       v  = aa + bs2*ai[i];
1753       for ( j=0; j<M; j++ ) { /* for each block */
1754         for ( k=0; k<bs2; k++ ) {
1755           (*v++) *= li[k%bs];
1756         }
1757       }
1758     }
1759   }
1760 
1761   if (rr) {
1762     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1763     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1764     for ( i=0; i<mbs; i++ ) { /* for each block row */
1765       M  = ai[i+1] - ai[i];
1766       v  = aa + bs2*ai[i];
1767       for ( j=0; j<M; j++ ) { /* for each block */
1768         ri = r + bs*aj[ai[i]+j];
1769         for ( k=0; k<bs; k++ ) {
1770           x = ri[k];
1771           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1772         }
1773       }
1774     }
1775   }
1776   return 0;
1777 }
1778 
1779 
1780 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1781 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1782 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1783 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1784 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1785 
1786 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1787 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1788 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1789 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1790 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1791 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1792 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1793 
1794 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1795 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1796 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1797 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1798 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1799 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1800 
1801 #undef __FUNC__
1802 #define __FUNC__ "MatNorm_SeqBAIJ"
1803 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1804 {
1805   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1806   Scalar      *v = a->a;
1807   double      sum = 0.0;
1808   int         i,nz=a->nz,bs2=a->bs2;
1809 
1810   if (type == NORM_FROBENIUS) {
1811     for (i=0; i< bs2*nz; i++ ) {
1812 #if defined(PETSC_COMPLEX)
1813       sum += real(conj(*v)*(*v)); v++;
1814 #else
1815       sum += (*v)*(*v); v++;
1816 #endif
1817     }
1818     *norm = sqrt(sum);
1819   }
1820   else {
1821     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1822   }
1823   return 0;
1824 }
1825 
1826 /*
1827      note: This can only work for identity for row and col. It would
1828    be good to check this and otherwise generate an error.
1829 */
1830 #undef __FUNC__
1831 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1832 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1833 {
1834   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1835   Mat         outA;
1836   int         ierr;
1837 
1838   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1839 
1840   outA          = inA;
1841   inA->factor   = FACTOR_LU;
1842   a->row        = row;
1843   a->col        = col;
1844 
1845   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1846 
1847   if (!a->diag) {
1848     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1849   }
1850   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1851   return 0;
1852 }
1853 
1854 #undef __FUNC__
1855 #define __FUNC__ "MatScale_SeqBAIJ"
1856 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1857 {
1858   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1859   int         one = 1, totalnz = a->bs2*a->nz;
1860   BLscal_( &totalnz, alpha, a->a, &one );
1861   PLogFlops(totalnz);
1862   return 0;
1863 }
1864 
1865 #undef __FUNC__
1866 #define __FUNC__ "MatGetValues_SeqBAIJ"
1867 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1868 {
1869   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1870   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1871   int        *ai = a->i, *ailen = a->ilen;
1872   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1873   Scalar     *ap, *aa = a->a, zero = 0.0;
1874 
1875   for ( k=0; k<m; k++ ) { /* loop over rows */
1876     row  = im[k]; brow = row/bs;
1877     if (row < 0) SETERRQ(1,0,"Negative row");
1878     if (row >= a->m) SETERRQ(1,0,"Row too large");
1879     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1880     nrow = ailen[brow];
1881     for ( l=0; l<n; l++ ) { /* loop over columns */
1882       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1883       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1884       col  = in[l] ;
1885       bcol = col/bs;
1886       cidx = col%bs;
1887       ridx = row%bs;
1888       high = nrow;
1889       low  = 0; /* assume unsorted */
1890       while (high-low > 5) {
1891         t = (low+high)/2;
1892         if (rp[t] > bcol) high = t;
1893         else             low  = t;
1894       }
1895       for ( i=low; i<high; i++ ) {
1896         if (rp[i] > bcol) break;
1897         if (rp[i] == bcol) {
1898           *v++ = ap[bs2*i+bs*cidx+ridx];
1899           goto finished;
1900         }
1901       }
1902       *v++ = zero;
1903       finished:;
1904     }
1905   }
1906   return 0;
1907 }
1908 
1909 #undef __FUNC__
1910 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */
1911 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1912 {
1913   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1914   *bs = baij->bs;
1915   return 0;
1916 }
1917 
1918 /* idx should be of length atlease bs */
1919 #undef __FUNC__
1920 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1921 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1922 {
1923   int i,row;
1924   row = idx[0];
1925   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1926 
1927   for ( i=1; i<bs; i++ ) {
1928     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1929   }
1930   *flg = PETSC_TRUE;
1931   return 0;
1932 }
1933 
1934 #undef __FUNC__
1935 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1936 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1937 {
1938   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1939   IS          is_local;
1940   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1941   PetscTruth  flg;
1942   Scalar      zero = 0.0,*aa;
1943 
1944   /* Make a copy of the IS and  sort it */
1945   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1946   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1947   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1948   ierr = ISSort(is_local); CHKERRQ(ierr);
1949   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1950 
1951   i = 0;
1952   while (i < is_n) {
1953     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1954     flg = PETSC_FALSE;
1955     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1956     if (flg) { /* There exists a block of rows to be Zerowed */
1957       baij->ilen[rows[i]/bs] = 0;
1958       i += bs;
1959     } else { /* Zero out only the requested row */
1960       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1961       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1962       for ( j=0; j<count; j++ ) {
1963         aa[0] = zero;
1964         aa+=bs;
1965       }
1966       i++;
1967     }
1968   }
1969   if (diag) {
1970     for ( j=0; j<is_n; j++ ) {
1971       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1972     }
1973   }
1974   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1975   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1976   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1977   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1978 
1979   return 0;
1980 }
1981 
1982 #undef __FUNC__
1983 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */
1984 int MatPrintHelp_SeqBAIJ(Mat A)
1985 {
1986   static int called = 0;
1987   MPI_Comm   comm = A->comm;
1988 
1989   if (called) return 0; else called = 1;
1990   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1991   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1992   return 0;
1993 }
1994 
1995 /* -------------------------------------------------------------------*/
1996 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1997        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1998        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1999        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
2000        MatSolve_SeqBAIJ_N,0,
2001        0,0,
2002        MatLUFactor_SeqBAIJ,0,
2003        0,
2004        MatTranspose_SeqBAIJ,
2005        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
2006        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2007        0,MatAssemblyEnd_SeqBAIJ,
2008        0,
2009        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
2010        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2011        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2012        MatILUFactorSymbolic_SeqBAIJ,0,
2013        0,0,
2014        MatConvertSameType_SeqBAIJ,0,0,
2015        MatILUFactor_SeqBAIJ,0,0,
2016        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
2017        MatGetValues_SeqBAIJ,0,
2018        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
2019        0,0,0,MatGetBlockSize_SeqBAIJ,
2020        MatGetRowIJ_SeqBAIJ,
2021        MatRestoreRowIJ_SeqBAIJ,
2022        0,0,0,0,0,0,
2023        MatSetValuesBlocked_SeqBAIJ};
2024 
2025 #undef __FUNC__
2026 #define __FUNC__ "MatCreateSeqBAIJ"
2027 /*@C
2028    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2029    compressed row) format.  For good matrix assembly performance the
2030    user should preallocate the matrix storage by setting the parameter nz
2031    (or the array nzz).  By setting these parameters accurately, performance
2032    during matrix assembly can be increased by more than a factor of 50.
2033 
2034    Input Parameters:
2035 .  comm - MPI communicator, set to PETSC_COMM_SELF
2036 .  bs - size of block
2037 .  m - number of rows
2038 .  n - number of columns
2039 .  nz - number of block nonzeros per block row (same for all rows)
2040 .  nzz - array containing the number of block nonzeros in the various block rows
2041          (possibly different for each block row) or PETSC_NULL
2042 
2043    Output Parameter:
2044 .  A - the matrix
2045 
2046    Options Database Keys:
2047 $    -mat_no_unroll - uses code that does not unroll the loops in the
2048 $                     block calculations (much solver)
2049 $    -mat_block_size - size of the blocks to use
2050 
2051    Notes:
2052    The block AIJ format is fully compatible with standard Fortran 77
2053    storage.  That is, the stored row and column indices can begin at
2054    either one (as in Fortran) or zero.  See the users' manual for details.
2055 
2056    Specify the preallocated storage with either nz or nnz (not both).
2057    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2058    allocation.  For additional details, see the users manual chapter on
2059    matrices.
2060 
2061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2062 @*/
2063 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
2064 {
2065   Mat         B;
2066   Mat_SeqBAIJ *b;
2067   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
2068 
2069   MPI_Comm_size(comm,&size);
2070   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2071 
2072   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
2073 
2074   if (mbs*bs!=m || nbs*bs!=n)
2075     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
2076 
2077   *A = 0;
2078   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
2079   PLogObjectCreate(B);
2080   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
2081   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2082   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
2083   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
2084   if (!flg) {
2085     switch (bs) {
2086     case 1:
2087       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2088       B->ops.solve           = MatSolve_SeqBAIJ_1;
2089       B->ops.mult            = MatMult_SeqBAIJ_1;
2090       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
2091       break;
2092     case 2:
2093       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2094       B->ops.solve           = MatSolve_SeqBAIJ_2;
2095       B->ops.mult            = MatMult_SeqBAIJ_2;
2096       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
2097       break;
2098     case 3:
2099       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2100       B->ops.solve           = MatSolve_SeqBAIJ_3;
2101       B->ops.mult            = MatMult_SeqBAIJ_3;
2102       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
2103       break;
2104     case 4:
2105       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2106       B->ops.solve           = MatSolve_SeqBAIJ_4;
2107       B->ops.mult            = MatMult_SeqBAIJ_4;
2108       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
2109       break;
2110     case 5:
2111       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2112       B->ops.solve           = MatSolve_SeqBAIJ_5;
2113       B->ops.mult            = MatMult_SeqBAIJ_5;
2114       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
2115       break;
2116     case 7:
2117       B->ops.mult            = MatMult_SeqBAIJ_7;
2118       B->ops.solve           = MatSolve_SeqBAIJ_7;
2119       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2120       break;
2121     }
2122   }
2123   B->destroy          = MatDestroy_SeqBAIJ;
2124   B->view             = MatView_SeqBAIJ;
2125   B->factor           = 0;
2126   B->lupivotthreshold = 1.0;
2127   B->mapping          = 0;
2128   b->row              = 0;
2129   b->col              = 0;
2130   b->reallocs         = 0;
2131 
2132   b->m       = m; B->m = m; B->M = m;
2133   b->n       = n; B->n = n; B->N = n;
2134   b->mbs     = mbs;
2135   b->nbs     = nbs;
2136   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2137   if (nnz == PETSC_NULL) {
2138     if (nz == PETSC_DEFAULT) nz = 5;
2139     else if (nz <= 0)        nz = 1;
2140     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2141     nz = nz*mbs;
2142   }
2143   else {
2144     nz = 0;
2145     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2146   }
2147 
2148   /* allocate the matrix space */
2149   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2150   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2151   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2152   b->j  = (int *) (b->a + nz*bs2);
2153   PetscMemzero(b->j,nz*sizeof(int));
2154   b->i  = b->j + nz;
2155   b->singlemalloc = 1;
2156 
2157   b->i[0] = 0;
2158   for (i=1; i<mbs+1; i++) {
2159     b->i[i] = b->i[i-1] + b->imax[i-1];
2160   }
2161 
2162   /* b->ilen will count nonzeros in each block row so far. */
2163   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2164   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2165   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2166 
2167   b->bs               = bs;
2168   b->bs2              = bs2;
2169   b->mbs              = mbs;
2170   b->nz               = 0;
2171   b->maxnz            = nz*bs2;
2172   b->sorted           = 0;
2173   b->roworiented      = 1;
2174   b->nonew            = 0;
2175   b->diag             = 0;
2176   b->solve_work       = 0;
2177   b->mult_work        = 0;
2178   b->spptr            = 0;
2179   B->info.nz_unneeded = (double)b->maxnz;
2180 
2181   *A = B;
2182   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2183   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2184   return 0;
2185 }
2186 
2187 #undef __FUNC__
2188 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2189 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2190 {
2191   Mat         C;
2192   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2193   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2194 
2195   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
2196 
2197   *B = 0;
2198   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
2199   PLogObjectCreate(C);
2200   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2201   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2202   C->destroy    = MatDestroy_SeqBAIJ;
2203   C->view       = MatView_SeqBAIJ;
2204   C->factor     = A->factor;
2205   c->row        = 0;
2206   c->col        = 0;
2207   C->assembled  = PETSC_TRUE;
2208 
2209   c->m = C->m   = a->m;
2210   c->n = C->n   = a->n;
2211   C->M          = a->m;
2212   C->N          = a->n;
2213 
2214   c->bs         = a->bs;
2215   c->bs2        = a->bs2;
2216   c->mbs        = a->mbs;
2217   c->nbs        = a->nbs;
2218 
2219   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2220   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2221   for ( i=0; i<mbs; i++ ) {
2222     c->imax[i] = a->imax[i];
2223     c->ilen[i] = a->ilen[i];
2224   }
2225 
2226   /* allocate the matrix space */
2227   c->singlemalloc = 1;
2228   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2229   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2230   c->j  = (int *) (c->a + nz*bs2);
2231   c->i  = c->j + nz;
2232   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2233   if (mbs > 0) {
2234     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2235     if (cpvalues == COPY_VALUES) {
2236       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2237     }
2238   }
2239 
2240   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2241   c->sorted      = a->sorted;
2242   c->roworiented = a->roworiented;
2243   c->nonew       = a->nonew;
2244 
2245   if (a->diag) {
2246     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2247     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2248     for ( i=0; i<mbs; i++ ) {
2249       c->diag[i] = a->diag[i];
2250     }
2251   }
2252   else c->diag          = 0;
2253   c->nz                 = a->nz;
2254   c->maxnz              = a->maxnz;
2255   c->solve_work         = 0;
2256   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2257   c->mult_work          = 0;
2258   *B = C;
2259   return 0;
2260 }
2261 
2262 #undef __FUNC__
2263 #define __FUNC__ "MatLoad_SeqBAIJ"
2264 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2265 {
2266   Mat_SeqBAIJ  *a;
2267   Mat          B;
2268   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2269   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2270   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2271   int          *masked, nmask,tmp,bs2,ishift;
2272   Scalar       *aa;
2273   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2274 
2275   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2276   bs2  = bs*bs;
2277 
2278   MPI_Comm_size(comm,&size);
2279   if (size > 1) SETERRQ(1,0,"view must have one processor");
2280   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2281   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2282   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2283   M = header[1]; N = header[2]; nz = header[3];
2284 
2285   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2286 
2287   /*
2288      This code adds extra rows to make sure the number of rows is
2289     divisible by the blocksize
2290   */
2291   mbs        = M/bs;
2292   extra_rows = bs - M + bs*(mbs);
2293   if (extra_rows == bs) extra_rows = 0;
2294   else                  mbs++;
2295   if (extra_rows) {
2296     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2297   }
2298 
2299   /* read in row lengths */
2300   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2301   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2302   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2303 
2304   /* read in column indices */
2305   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2306   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2307   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2308 
2309   /* loop over row lengths determining block row lengths */
2310   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2311   PetscMemzero(browlengths,mbs*sizeof(int));
2312   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2313   PetscMemzero(mask,mbs*sizeof(int));
2314   masked = mask + mbs;
2315   rowcount = 0; nzcount = 0;
2316   for ( i=0; i<mbs; i++ ) {
2317     nmask = 0;
2318     for ( j=0; j<bs; j++ ) {
2319       kmax = rowlengths[rowcount];
2320       for ( k=0; k<kmax; k++ ) {
2321         tmp = jj[nzcount++]/bs;
2322         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2323       }
2324       rowcount++;
2325     }
2326     browlengths[i] += nmask;
2327     /* zero out the mask elements we set */
2328     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2329   }
2330 
2331   /* create our matrix */
2332   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2333          CHKERRQ(ierr);
2334   B = *A;
2335   a = (Mat_SeqBAIJ *) B->data;
2336 
2337   /* set matrix "i" values */
2338   a->i[0] = 0;
2339   for ( i=1; i<= mbs; i++ ) {
2340     a->i[i]      = a->i[i-1] + browlengths[i-1];
2341     a->ilen[i-1] = browlengths[i-1];
2342   }
2343   a->nz         = 0;
2344   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2345 
2346   /* read in nonzero values */
2347   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2348   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2349   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2350 
2351   /* set "a" and "j" values into matrix */
2352   nzcount = 0; jcount = 0;
2353   for ( i=0; i<mbs; i++ ) {
2354     nzcountb = nzcount;
2355     nmask    = 0;
2356     for ( j=0; j<bs; j++ ) {
2357       kmax = rowlengths[i*bs+j];
2358       for ( k=0; k<kmax; k++ ) {
2359         tmp = jj[nzcount++]/bs;
2360 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2361       }
2362       rowcount++;
2363     }
2364     /* sort the masked values */
2365     PetscSortInt(nmask,masked);
2366 
2367     /* set "j" values into matrix */
2368     maskcount = 1;
2369     for ( j=0; j<nmask; j++ ) {
2370       a->j[jcount++]  = masked[j];
2371       mask[masked[j]] = maskcount++;
2372     }
2373     /* set "a" values into matrix */
2374     ishift = bs2*a->i[i];
2375     for ( j=0; j<bs; j++ ) {
2376       kmax = rowlengths[i*bs+j];
2377       for ( k=0; k<kmax; k++ ) {
2378         tmp    = jj[nzcountb]/bs ;
2379         block  = mask[tmp] - 1;
2380         point  = jj[nzcountb] - bs*tmp;
2381         idx    = ishift + bs2*block + j + bs*point;
2382         a->a[idx] = aa[nzcountb++];
2383       }
2384     }
2385     /* zero out the mask elements we set */
2386     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2387   }
2388   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2389 
2390   PetscFree(rowlengths);
2391   PetscFree(browlengths);
2392   PetscFree(aa);
2393   PetscFree(jj);
2394   PetscFree(mask);
2395 
2396   B->assembled = PETSC_TRUE;
2397 
2398   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2399   if (flg) {
2400     Viewer tviewer;
2401     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2402     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2403     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2404     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2405   }
2406   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2407   if (flg) {
2408     Viewer tviewer;
2409     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2410     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2411     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2412     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2413   }
2414   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2415   if (flg) {
2416     Viewer tviewer;
2417     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2418     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2419     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2420   }
2421   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2422   if (flg) {
2423     Viewer tviewer;
2424     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2425     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2426     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2427     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2428   }
2429   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2430   if (flg) {
2431     Viewer tviewer;
2432     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2433     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2434     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2435     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2436   }
2437   return 0;
2438 }
2439 
2440 
2441 
2442