xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 96854ed6ad2ded53123cd28a73515071866c9e4e)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.98 1997/04/10 00:03:26 bsmith Exp curfman $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 
10 #include "pinclude/pviewer.h"
11 #include "sys.h"
12 #include "src/mat/impls/baij/seq/baij.h"
13 #include "src/vec/vecimpl.h"
14 #include "src/inline/spops.h"
15 #include "petsc.h"
16 
17 
18 /*
19      Adds diagonal pointers to sparse matrix structure.
20 */
21 
22 #undef __FUNC__
23 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24 int MatMarkDiag_SeqBAIJ(Mat A)
25 {
26   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
27   int         i,j, *diag, m = a->mbs;
28 
29   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30   PLogObjectMemory(A,(m+1)*sizeof(int));
31   for ( i=0; i<m; i++ ) {
32     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33       if (a->j[j] == i) {
34         diag[i] = j;
35         break;
36       }
37     }
38   }
39   a->diag = diag;
40   return 0;
41 }
42 
43 
44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
45 
46 #undef __FUNC__
47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
49                             PetscTruth *done)
50 {
51   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
52   int         ierr, n = a->mbs,i;
53 
54   *nn = n;
55   if (!ia) return 0;
56   if (symmetric) {
57     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
58   } else if (oshift == 1) {
59     /* temporarily add 1 to i and j indices */
60     int nz = a->i[n] + 1;
61     for ( i=0; i<nz; i++ ) a->j[i]++;
62     for ( i=0; i<n+1; i++ ) a->i[i]++;
63     *ia = a->i; *ja = a->j;
64   } else {
65     *ia = a->i; *ja = a->j;
66   }
67 
68   return 0;
69 }
70 
71 #undef __FUNC__
72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */
73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
74                                 PetscTruth *done)
75 {
76   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77   int         i,n = a->mbs;
78 
79   if (!ia) return 0;
80   if (symmetric) {
81     PetscFree(*ia);
82     PetscFree(*ja);
83   } else if (oshift == 1) {
84     int nz = a->i[n];
85     for ( i=0; i<nz; i++ ) a->j[i]--;
86     for ( i=0; i<n+1; i++ ) a->i[i]--;
87   }
88   return 0;
89 }
90 
91 
92 #undef __FUNC__
93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */
94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
95 {
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
97   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98   Scalar      *aa;
99 
100   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
101   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
102   col_lens[0] = MAT_COOKIE;
103 
104   col_lens[1] = a->m;
105   col_lens[2] = a->n;
106   col_lens[3] = a->nz*bs2;
107 
108   /* store lengths of each row and write (including header) to file */
109   count = 0;
110   for ( i=0; i<a->mbs; i++ ) {
111     for ( j=0; j<bs; j++ ) {
112       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113     }
114   }
115   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
116   PetscFree(col_lens);
117 
118   /* store column indices (zero start index) */
119   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120   count = 0;
121   for ( i=0; i<a->mbs; i++ ) {
122     for ( j=0; j<bs; j++ ) {
123       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124         for ( l=0; l<bs; l++ ) {
125           jj[count++] = bs*a->j[k] + l;
126         }
127       }
128     }
129   }
130   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131   PetscFree(jj);
132 
133   /* store nonzero values */
134   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135   count = 0;
136   for ( i=0; i<a->mbs; i++ ) {
137     for ( j=0; j<bs; j++ ) {
138       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139         for ( l=0; l<bs; l++ ) {
140           aa[count++] = a->a[bs2*k + l*bs + j];
141         }
142       }
143     }
144   }
145   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146   PetscFree(aa);
147   return 0;
148 }
149 
150 #undef __FUNC__
151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */
152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
153 {
154   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
155   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
156   FILE        *fd;
157   char        *outputname;
158 
159   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
160   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
161   ierr = ViewerGetFormat(viewer,&format);
162   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
163     fprintf(fd,"  block size is %d\n",bs);
164   }
165   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166     SETERRQ(1,0,"Matlab format not supported");
167   }
168   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
169     for ( i=0; i<a->mbs; i++ ) {
170       for ( j=0; j<bs; j++ ) {
171         fprintf(fd,"row %d:",i*bs+j);
172         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
173           for ( l=0; l<bs; l++ ) {
174 #if defined(PETSC_COMPLEX)
175           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
176             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
177               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
179             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
180 #else
181           if (a->a[bs2*k + l*bs + j] != 0.0)
182             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
183 #endif
184           }
185         }
186         fprintf(fd,"\n");
187       }
188     }
189   }
190   else {
191     for ( i=0; i<a->mbs; i++ ) {
192       for ( j=0; j<bs; j++ ) {
193         fprintf(fd,"row %d:",i*bs+j);
194         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195           for ( l=0; l<bs; l++ ) {
196 #if defined(PETSC_COMPLEX)
197           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
198             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
199               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
200           }
201           else {
202             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
203           }
204 #else
205             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
206 #endif
207           }
208         }
209         fprintf(fd,"\n");
210       }
211     }
212   }
213   fflush(fd);
214   return 0;
215 }
216 
217 #undef __FUNC__
218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */
219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220 {
221   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224   Scalar       *aa;
225   Draw         draw;
226   DrawButton   button;
227   PetscTruth   isnull;
228 
229   ViewerDrawGetDraw(viewer,&draw);
230   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231 
232   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233   xr += w;    yr += h;  xl = -w;     yl = -h;
234   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235   /* loop over matrix elements drawing boxes */
236   color = DRAW_BLUE;
237   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241       aa = a->a + j*bs2;
242       for ( k=0; k<bs; k++ ) {
243         for ( l=0; l<bs; l++ ) {
244           if (PetscReal(*aa++) >=  0.) continue;
245           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
246         }
247       }
248     }
249   }
250   color = DRAW_CYAN;
251   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255       aa = a->a + j*bs2;
256       for ( k=0; k<bs; k++ ) {
257         for ( l=0; l<bs; l++ ) {
258           if (PetscReal(*aa++) != 0.) continue;
259           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
260         }
261       }
262     }
263   }
264 
265   color = DRAW_RED;
266   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270       aa = a->a + j*bs2;
271       for ( k=0; k<bs; k++ ) {
272         for ( l=0; l<bs; l++ ) {
273           if (PetscReal(*aa++) <= 0.) continue;
274           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
275         }
276       }
277     }
278   }
279 
280   DrawFlush(draw);
281   DrawGetPause(draw,&pause);
282   if (pause >= 0) { PetscSleep(pause); return 0;}
283 
284   /* allow the matrix to zoom or shrink */
285   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286   while (button != BUTTON_RIGHT) {
287     DrawClear(draw);
288     if (button == BUTTON_LEFT) scale = .5;
289     else if (button == BUTTON_CENTER) scale = 2.;
290     xl = scale*(xl + w - xc) + xc - w*scale;
291     xr = scale*(xr - w - xc) + xc + w*scale;
292     yl = scale*(yl + h - yc) + yc - h*scale;
293     yr = scale*(yr - h - yc) + yc + h*scale;
294     w *= scale; h *= scale;
295     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296     color = DRAW_BLUE;
297     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301         aa = a->a + j*bs2;
302         for ( k=0; k<bs; k++ ) {
303           for ( l=0; l<bs; l++ ) {
304             if (PetscReal(*aa++) >=  0.) continue;
305             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
306           }
307         }
308       }
309     }
310     color = DRAW_CYAN;
311     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315         aa = a->a + j*bs2;
316         for ( k=0; k<bs; k++ ) {
317           for ( l=0; l<bs; l++ ) {
318           if (PetscReal(*aa++) != 0.) continue;
319           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
320           }
321         }
322       }
323     }
324 
325     color = DRAW_RED;
326     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330         aa = a->a + j*bs2;
331         for ( k=0; k<bs; k++ ) {
332           for ( l=0; l<bs; l++ ) {
333             if (PetscReal(*aa++) <= 0.) continue;
334             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
335           }
336         }
337       }
338     }
339     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340   }
341   return 0;
342 }
343 
344 #undef __FUNC__
345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */
346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
347 {
348   Mat         A = (Mat) obj;
349   ViewerType  vtype;
350   int         ierr;
351 
352   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
353   if (vtype == MATLAB_VIEWER) {
354     SETERRQ(1,0,"Matlab viewer not supported");
355   }
356   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357     return MatView_SeqBAIJ_ASCII(A,viewer);
358   }
359   else if (vtype == BINARY_FILE_VIEWER) {
360     return MatView_SeqBAIJ_Binary(A,viewer);
361   }
362   else if (vtype == DRAW_VIEWER) {
363     return MatView_SeqBAIJ_Draw(A,viewer);
364   }
365   return 0;
366 }
367 
368 #define CHUNKSIZE  10
369 
370 #undef __FUNC__
371 #define __FUNC__ "MatSetValues_SeqBAIJ"
372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373 {
374   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
378   int          ridx,cidx,bs2=a->bs2;
379   Scalar      *ap,value,*aa=a->a,*bap;
380 
381   for ( k=0; k<m; k++ ) { /* loop over added rows */
382     row  = im[k]; brow = row/bs;
383 #if defined(PETSC_BOPT_g)
384     if (row < 0) SETERRQ(1,0,"Negative row");
385     if (row >= a->m) SETERRQ(1,0,"Row too large");
386 #endif
387     rp   = aj + ai[brow];
388     ap   = aa + bs2*ai[brow];
389     rmax = imax[brow];
390     nrow = ailen[brow];
391     low  = 0;
392     for ( l=0; l<n; l++ ) { /* loop over added columns */
393 #if defined(PETSC_BOPT_g)
394       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
396 #endif
397       col = in[l]; bcol = col/bs;
398       ridx = row % bs; cidx = col % bs;
399       if (roworiented) {
400         value = *v++;
401       }
402       else {
403         value = v[k + l*m];
404       }
405       if (!sorted) low = 0; high = nrow;
406       while (high-low > 7) {
407         t = (low+high)/2;
408         if (rp[t] > bcol) high = t;
409         else              low  = t;
410       }
411       for ( i=low; i<high; i++ ) {
412         if (rp[i] > bcol) break;
413         if (rp[i] == bcol) {
414           bap  = ap +  bs2*i + bs*cidx + ridx;
415           if (is == ADD_VALUES) *bap += value;
416           else                  *bap  = value;
417           goto noinsert1;
418         }
419       }
420       if (nonew == 1) goto noinsert1;
421       else if (nonew == -1) SETERRQ(1,1,"Inserting a new nonzero in the matrix");
422       if (nrow >= rmax) {
423         /* there is no extra room in row, therefore enlarge */
424         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
425         Scalar *new_a;
426 
427         if (nonew == -2) SETERRQ(1,1,"Inserting a new nonzero in the matrix");
428 
429         /* Malloc new storage space */
430         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
431         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
432         new_j   = (int *) (new_a + bs2*new_nz);
433         new_i   = new_j + new_nz;
434 
435         /* copy over old data into new slots */
436         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
437         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
438         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
439         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
440         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
441                                                            len*sizeof(int));
442         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
443         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
444         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
445                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
446         /* free up old matrix storage */
447         PetscFree(a->a);
448         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
449         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
450         a->singlemalloc = 1;
451 
452         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
453         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
454         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
455         a->maxnz += bs2*CHUNKSIZE;
456         a->reallocs++;
457         a->nz++;
458       }
459       N = nrow++ - 1;
460       /* shift up all the later entries in this row */
461       for ( ii=N; ii>=i; ii-- ) {
462         rp[ii+1] = rp[ii];
463         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
464       }
465       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
466       rp[i]                      = bcol;
467       ap[bs2*i + bs*cidx + ridx] = value;
468       noinsert1:;
469       low = i;
470     }
471     ailen[brow] = nrow;
472   }
473   return 0;
474 }
475 
476 #undef __FUNC__
477 #define __FUNC__ "MatSetValues_SeqBAIJ"
478 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
479 {
480   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
481   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
482   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
483   int         roworiented=a->roworiented;
484   int         *aj=a->j,nonew=a->nonew;
485   int          bs2=a->bs2,bs=a->bs,stepval;
486   Scalar      *ap,*value=v,*aa=a->a,*bap;
487 
488   if (roworiented) {
489     stepval = (n-1)*bs;
490   } else {
491     stepval = (m-1)*bs;
492   }
493   for ( k=0; k<m; k++ ) { /* loop over added rows */
494     row  = im[k];
495 #if defined(PETSC_BOPT_g)
496     if (row < 0) SETERRQ(1,0,"Negative row");
497     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
498 #endif
499     rp   = aj + ai[row];
500     ap   = aa + bs2*ai[row];
501     rmax = imax[row];
502     nrow = ailen[row];
503     low  = 0;
504     for ( l=0; l<n; l++ ) { /* loop over added columns */
505 #if defined(PETSC_BOPT_g)
506       if (in[l] < 0) SETERRQ(1,0,"Negative column");
507       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
508 #endif
509       col = in[l];
510       if (roworiented) {
511         value = v + k*(stepval+bs)*bs + l*bs;
512       } else {
513         value = v + l*(stepval+bs)*bs + k*bs;
514       }
515       if (!sorted) low = 0; high = nrow;
516       while (high-low > 7) {
517         t = (low+high)/2;
518         if (rp[t] > col) high = t;
519         else             low  = t;
520       }
521       for ( i=low; i<high; i++ ) {
522         if (rp[i] > col) break;
523         if (rp[i] == col) {
524           bap  = ap +  bs2*i;
525           if (roworiented) {
526             if (is == ADD_VALUES) {
527               for ( ii=0; ii<bs; ii++,value+=stepval )
528                 for (jj=ii; jj<bs2; jj+=bs )
529                   bap[jj] += *value++;
530             } else {
531               for ( ii=0; ii<bs; ii++,value+=stepval )
532                 for (jj=ii; jj<bs2; jj+=bs )
533                   bap[jj] = *value++;
534             }
535           } else {
536             if (is == ADD_VALUES) {
537               for ( ii=0; ii<bs; ii++,value+=stepval )
538                 for (jj=0; jj<bs; jj++ )
539                   *bap++ += *value++;
540             } else {
541               for ( ii=0; ii<bs; ii++,value+=stepval )
542                 for (jj=0; jj<bs; jj++ )
543                   *bap++  = *value++;
544             }
545           }
546           goto noinsert2;
547         }
548       }
549       if (nonew) goto noinsert2;
550       if (nrow >= rmax) {
551         /* there is no extra room in row, therefore enlarge */
552         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
553         Scalar *new_a;
554 
555         /* malloc new storage space */
556         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
557         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
558         new_j   = (int *) (new_a + bs2*new_nz);
559         new_i   = new_j + new_nz;
560 
561         /* copy over old data into new slots */
562         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
563         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
564         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
565         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
566         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
567                                                            len*sizeof(int));
568         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
569         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
570         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
571                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
572         /* free up old matrix storage */
573         PetscFree(a->a);
574         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
575         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
576         a->singlemalloc = 1;
577 
578         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
579         rmax = imax[row] = imax[row] + CHUNKSIZE;
580         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
581         a->maxnz += bs2*CHUNKSIZE;
582         a->reallocs++;
583         a->nz++;
584       }
585       N = nrow++ - 1;
586       /* shift up all the later entries in this row */
587       for ( ii=N; ii>=i; ii-- ) {
588         rp[ii+1] = rp[ii];
589         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
590       }
591       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
592       rp[i] = col;
593       bap   = ap +  bs2*i;
594       if (roworiented) {
595         for ( ii=0; ii<bs; ii++,value+=stepval)
596           for (jj=ii; jj<bs2; jj+=bs )
597             bap[jj] = *value++;
598       } else {
599         for ( ii=0; ii<bs; ii++,value+=stepval )
600           for (jj=0; jj<bs; jj++ )
601             *bap++  = *value++;
602       }
603       noinsert2:;
604       low = i;
605     }
606     ailen[row] = nrow;
607   }
608   return 0;
609 }
610 
611 #undef __FUNC__
612 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */
613 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
614 {
615   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
616   *m = a->m; *n = a->n;
617   return 0;
618 }
619 
620 #undef __FUNC__
621 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */
622 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
623 {
624   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
625   *m = 0; *n = a->m;
626   return 0;
627 }
628 
629 #undef __FUNC__
630 #define __FUNC__ "MatGetRow_SeqBAIJ"
631 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
632 {
633   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
634   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
635   Scalar      *aa,*v_i,*aa_i;
636 
637   bs  = a->bs;
638   ai  = a->i;
639   aj  = a->j;
640   aa  = a->a;
641   bs2 = a->bs2;
642 
643   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
644 
645   bn  = row/bs;   /* Block number */
646   bp  = row % bs; /* Block Position */
647   M   = ai[bn+1] - ai[bn];
648   *nz = bs*M;
649 
650   if (v) {
651     *v = 0;
652     if (*nz) {
653       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
654       for ( i=0; i<M; i++ ) { /* for each block in the block row */
655         v_i  = *v + i*bs;
656         aa_i = aa + bs2*(ai[bn] + i);
657         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
658       }
659     }
660   }
661 
662   if (idx) {
663     *idx = 0;
664     if (*nz) {
665       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
666       for ( i=0; i<M; i++ ) { /* for each block in the block row */
667         idx_i = *idx + i*bs;
668         itmp  = bs*aj[ai[bn] + i];
669         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
670       }
671     }
672   }
673   return 0;
674 }
675 
676 #undef __FUNC__
677 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */
678 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
679 {
680   if (idx) {if (*idx) PetscFree(*idx);}
681   if (v)   {if (*v)   PetscFree(*v);}
682   return 0;
683 }
684 
685 #undef __FUNC__
686 #define __FUNC__ "MatTranspose_SeqBAIJ"
687 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
688 {
689   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
690   Mat         C;
691   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
692   int         *rows,*cols,bs2=a->bs2;
693   Scalar      *array=a->a;
694 
695   if (B==PETSC_NULL && mbs!=nbs)
696     SETERRQ(1,0,"Square matrix only for in-place");
697   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
698   PetscMemzero(col,(1+nbs)*sizeof(int));
699 
700   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
701   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
702   PetscFree(col);
703   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
704   cols = rows + bs;
705   for ( i=0; i<mbs; i++ ) {
706     cols[0] = i*bs;
707     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
708     len = ai[i+1] - ai[i];
709     for ( j=0; j<len; j++ ) {
710       rows[0] = (*aj++)*bs;
711       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
712       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
713       array += bs2;
714     }
715   }
716   PetscFree(rows);
717 
718   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
719   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
720 
721   if (B != PETSC_NULL) {
722     *B = C;
723   } else {
724     /* This isn't really an in-place transpose */
725     PetscFree(a->a);
726     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
727     if (a->diag) PetscFree(a->diag);
728     if (a->ilen) PetscFree(a->ilen);
729     if (a->imax) PetscFree(a->imax);
730     if (a->solve_work) PetscFree(a->solve_work);
731     PetscFree(a);
732     PetscMemcpy(A,C,sizeof(struct _Mat));
733     PetscHeaderDestroy(C);
734   }
735   return 0;
736 }
737 
738 
739 #undef __FUNC__
740 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
741 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
742 {
743   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
744   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
745   int        m = a->m,*ip, N, *ailen = a->ilen;
746   int        mbs = a->mbs, bs2 = a->bs2,rmax;
747   Scalar     *aa = a->a, *ap;
748 
749   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
750 
751   rmax = ailen[0];
752   for ( i=1; i<mbs; i++ ) {
753     /* move each row back by the amount of empty slots (fshift) before it*/
754     fshift += imax[i-1] - ailen[i-1];
755     rmax   = PetscMax(rmax,ailen[i]);
756     if (fshift) {
757       ip = aj + ai[i]; ap = aa + bs2*ai[i];
758       N = ailen[i];
759       for ( j=0; j<N; j++ ) {
760         ip[j-fshift] = ip[j];
761         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
762       }
763     }
764     ai[i] = ai[i-1] + ailen[i-1];
765   }
766   if (mbs) {
767     fshift += imax[mbs-1] - ailen[mbs-1];
768     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
769   }
770   /* reset ilen and imax for each row */
771   for ( i=0; i<mbs; i++ ) {
772     ailen[i] = imax[i] = ai[i+1] - ai[i];
773   }
774   a->nz = ai[mbs];
775 
776   /* diagonals may have moved, so kill the diagonal pointers */
777   if (fshift && a->diag) {
778     PetscFree(a->diag);
779     PLogObjectMemory(A,-(m+1)*sizeof(int));
780     a->diag = 0;
781   }
782   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
783            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
784   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
785            a->reallocs);
786   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
787   a->reallocs          = 0;
788   A->info.nz_unneeded  = (double)fshift*bs2;
789 
790   return 0;
791 }
792 
793 #undef __FUNC__
794 #define __FUNC__ "MatZeroEntries_SeqBAIJ"
795 int MatZeroEntries_SeqBAIJ(Mat A)
796 {
797   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
798   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
799   return 0;
800 }
801 
802 #undef __FUNC__
803 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */
804 int MatDestroy_SeqBAIJ(PetscObject obj)
805 {
806   Mat         A  = (Mat) obj;
807   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
808   int         ierr;
809 
810 #if defined(PETSC_LOG)
811   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
812 #endif
813   PetscFree(a->a);
814   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
815   if (a->diag) PetscFree(a->diag);
816   if (a->ilen) PetscFree(a->ilen);
817   if (a->imax) PetscFree(a->imax);
818   if (a->solve_work) PetscFree(a->solve_work);
819   if (a->mult_work) PetscFree(a->mult_work);
820   PetscFree(a);
821   if (A->mapping) {
822     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
823   }
824   PLogObjectDestroy(A);
825   PetscHeaderDestroy(A);
826   return 0;
827 }
828 
829 #undef __FUNC__
830 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */
831 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
832 {
833   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
834   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
835   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
836   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
837   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
838   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
839   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
840   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
841   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
842   else if (op == MAT_ROWS_SORTED ||
843            op == MAT_ROWS_UNSORTED ||
844            op == MAT_SYMMETRIC ||
845            op == MAT_STRUCTURALLY_SYMMETRIC ||
846            op == MAT_YES_NEW_DIAGONALS ||
847            op == MAT_IGNORE_OFF_PROC_ENTRIES)
848     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
849   else if (op == MAT_NO_NEW_DIAGONALS)
850     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
851   else
852     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
853   return 0;
854 }
855 
856 
857 /* -------------------------------------------------------*/
858 /* Should check that shapes of vectors and matrices match */
859 /* -------------------------------------------------------*/
860 #include "pinclude/plapack.h"
861 
862 #undef __FUNC__
863 #define __FUNC__ "MatMult_SeqBAIJ_1"
864 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
865 {
866   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
867   register Scalar *x,*z,*v,sum;
868   int             mbs=a->mbs,i,*idx,*ii,n;
869 
870   VecGetArray_Fast(xx,x);
871   VecGetArray_Fast(zz,z);
872 
873   idx   = a->j;
874   v     = a->a;
875   ii    = a->i;
876 
877   for ( i=0; i<mbs; i++ ) {
878     n    = ii[1] - ii[0]; ii++;
879     sum  = 0.0;
880     while (n--) sum += *v++ * x[*idx++];
881     z[i] = sum;
882   }
883   VecRestoreArray_Fast(xx,x);
884   VecRestoreArray_Fast(zz,z);
885   PLogFlops(2*a->nz - a->m);
886   return 0;
887 }
888 
889 #undef __FUNC__
890 #define __FUNC__ "MatMult_SeqBAIJ_2"
891 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
892 {
893   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
894   register Scalar *x,*z,*v,*xb,sum1,sum2;
895   register Scalar x1,x2;
896   int             mbs=a->mbs,i,*idx,*ii;
897   int             j,n;
898 
899   VecGetArray_Fast(xx,x);
900   VecGetArray_Fast(zz,z);
901 
902   idx   = a->j;
903   v     = a->a;
904   ii    = a->i;
905 
906   for ( i=0; i<mbs; i++ ) {
907     n  = ii[1] - ii[0]; ii++;
908     sum1 = 0.0; sum2 = 0.0;
909     for ( j=0; j<n; j++ ) {
910       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
911       sum1 += v[0]*x1 + v[2]*x2;
912       sum2 += v[1]*x1 + v[3]*x2;
913       v += 4;
914     }
915     z[0] = sum1; z[1] = sum2;
916     z += 2;
917   }
918   VecRestoreArray_Fast(xx,x);
919   VecRestoreArray_Fast(zz,z);
920   PLogFlops(4*a->nz - a->m);
921   return 0;
922 }
923 
924 #undef __FUNC__
925 #define __FUNC__ "MatMult_SeqBAIJ_3"
926 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
927 {
928   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
929   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
930   int             mbs=a->mbs,i,*idx,*ii,j,n;
931 
932   VecGetArray_Fast(xx,x);
933   VecGetArray_Fast(zz,z);
934 
935   idx   = a->j;
936   v     = a->a;
937   ii    = a->i;
938 
939   for ( i=0; i<mbs; i++ ) {
940     n  = ii[1] - ii[0]; ii++;
941     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
942     for ( j=0; j<n; j++ ) {
943       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
944       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
945       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
946       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
947       v += 9;
948     }
949     z[0] = sum1; z[1] = sum2; z[2] = sum3;
950     z += 3;
951   }
952   VecRestoreArray_Fast(xx,x);
953   VecRestoreArray_Fast(zz,z);
954   PLogFlops(18*a->nz - a->m);
955   return 0;
956 }
957 
958 #undef __FUNC__
959 #define __FUNC__ "MatMult_SeqBAIJ_4"
960 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
961 {
962   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
963   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
964   register Scalar x1,x2,x3,x4;
965   int             mbs=a->mbs,i,*idx,*ii;
966   int             j,n;
967 
968   VecGetArray_Fast(xx,x);
969   VecGetArray_Fast(zz,z);
970 
971   idx   = a->j;
972   v     = a->a;
973   ii    = a->i;
974 
975   for ( i=0; i<mbs; i++ ) {
976     n  = ii[1] - ii[0]; ii++;
977     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
978     for ( j=0; j<n; j++ ) {
979       xb = x + 4*(*idx++);
980       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
981       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
982       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
983       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
984       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
985       v += 16;
986     }
987     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
988     z += 4;
989   }
990   VecRestoreArray_Fast(xx,x);
991   VecRestoreArray_Fast(zz,z);
992   PLogFlops(32*a->nz - a->m);
993   return 0;
994 }
995 
996 #undef __FUNC__
997 #define __FUNC__ "MatMult_SeqBAIJ_5"
998 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
999 {
1000   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1001   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1002   register Scalar x1,x2,x3,x4,x5;
1003   int             mbs=a->mbs,i,*idx,*ii,j,n;
1004 
1005   VecGetArray_Fast(xx,x);
1006   VecGetArray_Fast(zz,z);
1007 
1008   idx   = a->j;
1009   v     = a->a;
1010   ii    = a->i;
1011 
1012   for ( i=0; i<mbs; i++ ) {
1013     n  = ii[1] - ii[0]; ii++;
1014     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1015     for ( j=0; j<n; j++ ) {
1016       xb = x + 5*(*idx++);
1017       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1018       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1019       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1020       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1021       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1022       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1023       v += 25;
1024     }
1025     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1026     z += 5;
1027   }
1028   VecRestoreArray_Fast(xx,x);
1029   VecRestoreArray_Fast(zz,z);
1030   PLogFlops(50*a->nz - a->m);
1031   return 0;
1032 }
1033 
1034 #undef __FUNC__
1035 #define __FUNC__ "MatMult_SeqBAIJ_7"
1036 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1037 {
1038   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1039   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1040   register Scalar x1,x2,x3,x4,x5,x6,x7;
1041   int             mbs=a->mbs,i,*idx,*ii,j,n;
1042 
1043   VecGetArray_Fast(xx,x);
1044   VecGetArray_Fast(zz,z);
1045 
1046   idx   = a->j;
1047   v     = a->a;
1048   ii    = a->i;
1049 
1050   for ( i=0; i<mbs; i++ ) {
1051     n  = ii[1] - ii[0]; ii++;
1052     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1053     for ( j=0; j<n; j++ ) {
1054       xb = x + 7*(*idx++);
1055       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1056       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1057       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1058       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1059       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1060       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1061       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1062       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1063       v += 49;
1064     }
1065     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1066     z += 7;
1067   }
1068 
1069   VecRestoreArray_Fast(xx,x);
1070   VecRestoreArray_Fast(zz,z);
1071   PLogFlops(98*a->nz - a->m);
1072   return 0;
1073 }
1074 
1075 #undef __FUNC__
1076 #define __FUNC__ "MatMult_SeqBAIJ_N"
1077 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1078 {
1079   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1080   register Scalar *x,*z,*v,*xb;
1081   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1082   int             _One = 1,ncols,k;
1083   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1084 
1085   VecGetArray_Fast(xx,x);
1086   VecGetArray_Fast(zz,z);
1087 
1088   idx   = a->j;
1089   v     = a->a;
1090   ii    = a->i;
1091 
1092 
1093   if (!a->mult_work) {
1094     k = PetscMax(a->m,a->n);
1095     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1096   }
1097   work = a->mult_work;
1098   for ( i=0; i<mbs; i++ ) {
1099     n     = ii[1] - ii[0]; ii++;
1100     ncols = n*bs;
1101     workt = work;
1102     for ( j=0; j<n; j++ ) {
1103       xb = x + bs*(*idx++);
1104       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1105       workt += bs;
1106     }
1107     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1108     v += n*bs2;
1109     z += bs;
1110   }
1111   VecRestoreArray_Fast(xx,x);
1112   VecRestoreArray_Fast(zz,z);
1113   PLogFlops(2*a->nz*bs2 - a->m);
1114   return 0;
1115 }
1116 
1117 #undef __FUNC__
1118 #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1119 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1120 {
1121   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1122   register Scalar *x,*y,*z,*v,sum;
1123   int             mbs=a->mbs,i,*idx,*ii,n;
1124 
1125   VecGetArray_Fast(xx,x);
1126   VecGetArray_Fast(yy,y);
1127   VecGetArray_Fast(zz,z);
1128 
1129   idx   = a->j;
1130   v     = a->a;
1131   ii    = a->i;
1132 
1133   for ( i=0; i<mbs; i++ ) {
1134     n    = ii[1] - ii[0]; ii++;
1135     sum  = y[i];
1136     while (n--) sum += *v++ * x[*idx++];
1137     z[i] = sum;
1138   }
1139   VecRestoreArray_Fast(xx,x);
1140   VecRestoreArray_Fast(yy,y);
1141   VecRestoreArray_Fast(zz,z);
1142   PLogFlops(2*a->nz);
1143   return 0;
1144 }
1145 
1146 #undef __FUNC__
1147 #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1148 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1149 {
1150   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1151   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1152   register Scalar x1,x2;
1153   int             mbs=a->mbs,i,*idx,*ii;
1154   int             j,n;
1155 
1156   VecGetArray_Fast(xx,x);
1157   VecGetArray_Fast(yy,y);
1158   VecGetArray_Fast(zz,z);
1159 
1160   idx   = a->j;
1161   v     = a->a;
1162   ii    = a->i;
1163 
1164   for ( i=0; i<mbs; i++ ) {
1165     n  = ii[1] - ii[0]; ii++;
1166     sum1 = y[0]; sum2 = y[1];
1167     for ( j=0; j<n; j++ ) {
1168       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1169       sum1 += v[0]*x1 + v[2]*x2;
1170       sum2 += v[1]*x1 + v[3]*x2;
1171       v += 4;
1172     }
1173     z[0] = sum1; z[1] = sum2;
1174     z += 2; y += 2;
1175   }
1176   VecRestoreArray_Fast(xx,x);
1177   VecRestoreArray_Fast(yy,y);
1178   VecRestoreArray_Fast(zz,z);
1179   PLogFlops(4*a->nz);
1180   return 0;
1181 }
1182 
1183 #undef __FUNC__
1184 #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1185 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1186 {
1187   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1188   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1189   int             mbs=a->mbs,i,*idx,*ii,j,n;
1190 
1191   VecGetArray_Fast(xx,x);
1192   VecGetArray_Fast(yy,y);
1193   VecGetArray_Fast(zz,z);
1194 
1195   idx   = a->j;
1196   v     = a->a;
1197   ii    = a->i;
1198 
1199   for ( i=0; i<mbs; i++ ) {
1200     n  = ii[1] - ii[0]; ii++;
1201     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1202     for ( j=0; j<n; j++ ) {
1203       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1204       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1205       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1206       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1207       v += 9;
1208     }
1209     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1210     z += 3; y += 3;
1211   }
1212   VecRestoreArray_Fast(xx,x);
1213   VecRestoreArray_Fast(yy,y);
1214   VecRestoreArray_Fast(zz,z);
1215   PLogFlops(18*a->nz);
1216   return 0;
1217 }
1218 
1219 #undef __FUNC__
1220 #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1221 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1222 {
1223   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1224   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1225   register Scalar x1,x2,x3,x4;
1226   int             mbs=a->mbs,i,*idx,*ii;
1227   int             j,n;
1228 
1229   VecGetArray_Fast(xx,x);
1230   VecGetArray_Fast(yy,y);
1231   VecGetArray_Fast(zz,z);
1232 
1233   idx   = a->j;
1234   v     = a->a;
1235   ii    = a->i;
1236 
1237   for ( i=0; i<mbs; i++ ) {
1238     n  = ii[1] - ii[0]; ii++;
1239     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1240     for ( j=0; j<n; j++ ) {
1241       xb = x + 4*(*idx++);
1242       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1243       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1244       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1245       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1246       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1247       v += 16;
1248     }
1249     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1250     z += 4; y += 4;
1251   }
1252   VecRestoreArray_Fast(xx,x);
1253   VecRestoreArray_Fast(yy,y);
1254   VecRestoreArray_Fast(zz,z);
1255   PLogFlops(32*a->nz);
1256   return 0;
1257 }
1258 
1259 #undef __FUNC__
1260 #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1261 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1262 {
1263   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1264   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1265   register Scalar x1,x2,x3,x4,x5;
1266   int             mbs=a->mbs,i,*idx,*ii,j,n;
1267 
1268   VecGetArray_Fast(xx,x);
1269   VecGetArray_Fast(yy,y);
1270   VecGetArray_Fast(zz,z);
1271 
1272   idx   = a->j;
1273   v     = a->a;
1274   ii    = a->i;
1275 
1276   for ( i=0; i<mbs; i++ ) {
1277     n  = ii[1] - ii[0]; ii++;
1278     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1279     for ( j=0; j<n; j++ ) {
1280       xb = x + 5*(*idx++);
1281       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1282       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1283       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1284       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1285       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1286       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1287       v += 25;
1288     }
1289     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1290     z += 5; y += 5;
1291   }
1292   VecRestoreArray_Fast(xx,x);
1293   VecRestoreArray_Fast(yy,y);
1294   VecRestoreArray_Fast(zz,z);
1295   PLogFlops(50*a->nz);
1296   return 0;
1297 }
1298 
1299 #undef __FUNC__
1300 #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1301 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1302 {
1303   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1304   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1305   register Scalar x1,x2,x3,x4,x5,x6,x7;
1306   int             mbs=a->mbs,i,*idx,*ii,j,n;
1307 
1308   VecGetArray_Fast(xx,x);
1309   VecGetArray_Fast(yy,y);
1310   VecGetArray_Fast(zz,z);
1311 
1312   idx   = a->j;
1313   v     = a->a;
1314   ii    = a->i;
1315 
1316   for ( i=0; i<mbs; i++ ) {
1317     n  = ii[1] - ii[0]; ii++;
1318     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1319     for ( j=0; j<n; j++ ) {
1320       xb = x + 7*(*idx++);
1321       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1322       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1323       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1324       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1325       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1326       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1327       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1328       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1329       v += 49;
1330     }
1331     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1332     z += 7; y += 7;
1333   }
1334   VecRestoreArray_Fast(xx,x);
1335   VecRestoreArray_Fast(yy,y);
1336   VecRestoreArray_Fast(zz,z);
1337   PLogFlops(98*a->nz);
1338   return 0;
1339 }
1340 
1341 
1342 #undef __FUNC__
1343 #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1344 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1345 {
1346   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1347   register Scalar *x,*z,*v,*xb;
1348   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1349   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1350 
1351   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1352 
1353   VecGetArray_Fast(xx,x);
1354   VecGetArray_Fast(zz,z);
1355 
1356   idx   = a->j;
1357   v     = a->a;
1358   ii    = a->i;
1359 
1360 
1361   if (!a->mult_work) {
1362     k = PetscMax(a->m,a->n);
1363     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1364   }
1365   work = a->mult_work;
1366   for ( i=0; i<mbs; i++ ) {
1367     n     = ii[1] - ii[0]; ii++;
1368     ncols = n*bs;
1369     workt = work;
1370     for ( j=0; j<n; j++ ) {
1371       xb = x + bs*(*idx++);
1372       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1373       workt += bs;
1374     }
1375     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1376     v += n*bs2;
1377     z += bs;
1378   }
1379   VecRestoreArray_Fast(xx,x);
1380   VecRestoreArray_Fast(zz,z);
1381   PLogFlops(2*a->nz*bs2 );
1382   return 0;
1383 }
1384 
1385 #undef __FUNC__
1386 #define __FUNC__ "MatMultTrans_SeqBAIJ"
1387 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1388 {
1389   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1390   Scalar          *xg,*zg,*zb;
1391   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1392   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1393   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1394 
1395 
1396   VecGetArray_Fast(xx,xg); x = xg;
1397   VecGetArray_Fast(zz,zg); z = zg;
1398   PetscMemzero(z,N*sizeof(Scalar));
1399 
1400   idx   = a->j;
1401   v     = a->a;
1402   ii    = a->i;
1403 
1404   switch (bs) {
1405   case 1:
1406     for ( i=0; i<mbs; i++ ) {
1407       n  = ii[1] - ii[0]; ii++;
1408       xb = x + i; x1 = xb[0];
1409       ib = idx + ai[i];
1410       for ( j=0; j<n; j++ ) {
1411         rval    = ib[j];
1412         z[rval] += *v++ * x1;
1413       }
1414     }
1415     break;
1416   case 2:
1417     for ( i=0; i<mbs; i++ ) {
1418       n  = ii[1] - ii[0]; ii++;
1419       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1420       ib = idx + ai[i];
1421       for ( j=0; j<n; j++ ) {
1422         rval      = ib[j]*2;
1423         z[rval++] += v[0]*x1 + v[1]*x2;
1424         z[rval++] += v[2]*x1 + v[3]*x2;
1425         v += 4;
1426       }
1427     }
1428     break;
1429   case 3:
1430     for ( i=0; i<mbs; i++ ) {
1431       n  = ii[1] - ii[0]; ii++;
1432       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1433       ib = idx + ai[i];
1434       for ( j=0; j<n; j++ ) {
1435         rval      = ib[j]*3;
1436         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1437         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1438         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1439         v += 9;
1440       }
1441     }
1442     break;
1443   case 4:
1444     for ( i=0; i<mbs; i++ ) {
1445       n  = ii[1] - ii[0]; ii++;
1446       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1447       ib = idx + ai[i];
1448       for ( j=0; j<n; j++ ) {
1449         rval      = ib[j]*4;
1450         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1451         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1452         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1453         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1454         v += 16;
1455       }
1456     }
1457     break;
1458   case 5:
1459     for ( i=0; i<mbs; i++ ) {
1460       n  = ii[1] - ii[0]; ii++;
1461       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1462       x4 = xb[3];   x5 = xb[4];
1463       ib = idx + ai[i];
1464       for ( j=0; j<n; j++ ) {
1465         rval      = ib[j]*5;
1466         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1467         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1468         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1469         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1470         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1471         v += 25;
1472       }
1473     }
1474     break;
1475       /* block sizes larger then 5 by 5 are handled by BLAS */
1476     default: {
1477       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1478       if (!a->mult_work) {
1479         k = PetscMax(a->m,a->n);
1480         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1481         CHKPTRQ(a->mult_work);
1482       }
1483       work = a->mult_work;
1484       for ( i=0; i<mbs; i++ ) {
1485         n     = ii[1] - ii[0]; ii++;
1486         ncols = n*bs;
1487         PetscMemzero(work,ncols*sizeof(Scalar));
1488         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1489         v += n*bs2;
1490         x += bs;
1491         workt = work;
1492         for ( j=0; j<n; j++ ) {
1493           zb = z + bs*(*idx++);
1494           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1495           workt += bs;
1496         }
1497       }
1498     }
1499   }
1500   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1501   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1502   PLogFlops(2*a->nz*a->bs2 - a->n);
1503   return 0;
1504 }
1505 
1506 #undef __FUNC__
1507 #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1508 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1509 {
1510   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1511   Scalar          *xg,*zg,*zb;
1512   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1513   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1514   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1515 
1516 
1517 
1518   VecGetArray_Fast(xx,xg); x = xg;
1519   VecGetArray_Fast(zz,zg); z = zg;
1520 
1521   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1522   else PetscMemzero(z,N*sizeof(Scalar));
1523 
1524 
1525   idx   = a->j;
1526   v     = a->a;
1527   ii    = a->i;
1528 
1529   switch (bs) {
1530   case 1:
1531     for ( i=0; i<mbs; i++ ) {
1532       n  = ii[1] - ii[0]; ii++;
1533       xb = x + i; x1 = xb[0];
1534       ib = idx + ai[i];
1535       for ( j=0; j<n; j++ ) {
1536         rval    = ib[j];
1537         z[rval] += *v++ * x1;
1538       }
1539     }
1540     break;
1541   case 2:
1542     for ( i=0; i<mbs; i++ ) {
1543       n  = ii[1] - ii[0]; ii++;
1544       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1545       ib = idx + ai[i];
1546       for ( j=0; j<n; j++ ) {
1547         rval      = ib[j]*2;
1548         z[rval++] += v[0]*x1 + v[1]*x2;
1549         z[rval++] += v[2]*x1 + v[3]*x2;
1550         v += 4;
1551       }
1552     }
1553     break;
1554   case 3:
1555     for ( i=0; i<mbs; i++ ) {
1556       n  = ii[1] - ii[0]; ii++;
1557       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1558       ib = idx + ai[i];
1559       for ( j=0; j<n; j++ ) {
1560         rval      = ib[j]*3;
1561         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1562         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1563         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1564         v += 9;
1565       }
1566     }
1567     break;
1568   case 4:
1569     for ( i=0; i<mbs; i++ ) {
1570       n  = ii[1] - ii[0]; ii++;
1571       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1572       ib = idx + ai[i];
1573       for ( j=0; j<n; j++ ) {
1574         rval      = ib[j]*4;
1575         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1576         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1577         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1578         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1579         v += 16;
1580       }
1581     }
1582     break;
1583   case 5:
1584     for ( i=0; i<mbs; i++ ) {
1585       n  = ii[1] - ii[0]; ii++;
1586       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1587       x4 = xb[3];   x5 = xb[4];
1588       ib = idx + ai[i];
1589       for ( j=0; j<n; j++ ) {
1590         rval      = ib[j]*5;
1591         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1592         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1593         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1594         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1595         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1596         v += 25;
1597       }
1598     }
1599     break;
1600       /* block sizes larger then 5 by 5 are handled by BLAS */
1601     default: {
1602       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1603       if (!a->mult_work) {
1604         k = PetscMax(a->m,a->n);
1605         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1606         CHKPTRQ(a->mult_work);
1607       }
1608       work = a->mult_work;
1609       for ( i=0; i<mbs; i++ ) {
1610         n     = ii[1] - ii[0]; ii++;
1611         ncols = n*bs;
1612         PetscMemzero(work,ncols*sizeof(Scalar));
1613         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1614         v += n*bs2;
1615         x += bs;
1616         workt = work;
1617         for ( j=0; j<n; j++ ) {
1618           zb = z + bs*(*idx++);
1619           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1620           workt += bs;
1621         }
1622       }
1623     }
1624   }
1625   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1626   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1627   PLogFlops(2*a->nz*a->bs2);
1628   return 0;
1629 }
1630 
1631 #undef __FUNC__
1632 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */
1633 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1634 {
1635   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1636 
1637   info->rows_global    = (double)a->m;
1638   info->columns_global = (double)a->n;
1639   info->rows_local     = (double)a->m;
1640   info->columns_local  = (double)a->n;
1641   info->block_size     = a->bs2;
1642   info->nz_allocated   = a->maxnz;
1643   info->nz_used        = a->bs2*a->nz;
1644   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1645   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1646     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1647   info->assemblies   = A->num_ass;
1648   info->mallocs      = a->reallocs;
1649   info->memory       = A->mem;
1650   if (A->factor) {
1651     info->fill_ratio_given  = A->info.fill_ratio_given;
1652     info->fill_ratio_needed = A->info.fill_ratio_needed;
1653     info->factor_mallocs    = A->info.factor_mallocs;
1654   } else {
1655     info->fill_ratio_given  = 0;
1656     info->fill_ratio_needed = 0;
1657     info->factor_mallocs    = 0;
1658   }
1659   return 0;
1660 }
1661 
1662 #undef __FUNC__
1663 #define __FUNC__ "MatEqual_SeqBAIJ"
1664 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1665 {
1666   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1667 
1668   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
1669 
1670   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1671   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1672       (a->nz != b->nz)) {
1673     *flg = PETSC_FALSE; return 0;
1674   }
1675 
1676   /* if the a->i are the same */
1677   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1678     *flg = PETSC_FALSE; return 0;
1679   }
1680 
1681   /* if a->j are the same */
1682   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1683     *flg = PETSC_FALSE; return 0;
1684   }
1685 
1686   /* if a->a are the same */
1687   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1688     *flg = PETSC_FALSE; return 0;
1689   }
1690   *flg = PETSC_TRUE;
1691   return 0;
1692 
1693 }
1694 
1695 #undef __FUNC__
1696 #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
1697 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1698 {
1699   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1700   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1701   Scalar      *x, zero = 0.0,*aa,*aa_j;
1702 
1703   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
1704   bs   = a->bs;
1705   aa   = a->a;
1706   ai   = a->i;
1707   aj   = a->j;
1708   ambs = a->mbs;
1709   bs2  = a->bs2;
1710 
1711   VecSet(&zero,v);
1712   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1713   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
1714   for ( i=0; i<ambs; i++ ) {
1715     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1716       if (aj[j] == i) {
1717         row  = i*bs;
1718         aa_j = aa+j*bs2;
1719         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1720         break;
1721       }
1722     }
1723   }
1724   return 0;
1725 }
1726 
1727 #undef __FUNC__
1728 #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
1729 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1730 {
1731   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1732   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1733   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1734 
1735   ai  = a->i;
1736   aj  = a->j;
1737   aa  = a->a;
1738   m   = a->m;
1739   n   = a->n;
1740   bs  = a->bs;
1741   mbs = a->mbs;
1742   bs2 = a->bs2;
1743   if (ll) {
1744     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1745     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
1746     for ( i=0; i<mbs; i++ ) { /* for each block row */
1747       M  = ai[i+1] - ai[i];
1748       li = l + i*bs;
1749       v  = aa + bs2*ai[i];
1750       for ( j=0; j<M; j++ ) { /* for each block */
1751         for ( k=0; k<bs2; k++ ) {
1752           (*v++) *= li[k%bs];
1753         }
1754       }
1755     }
1756   }
1757 
1758   if (rr) {
1759     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1760     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
1761     for ( i=0; i<mbs; i++ ) { /* for each block row */
1762       M  = ai[i+1] - ai[i];
1763       v  = aa + bs2*ai[i];
1764       for ( j=0; j<M; j++ ) { /* for each block */
1765         ri = r + bs*aj[ai[i]+j];
1766         for ( k=0; k<bs; k++ ) {
1767           x = ri[k];
1768           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1769         }
1770       }
1771     }
1772   }
1773   return 0;
1774 }
1775 
1776 
1777 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1778 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1779 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1780 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1781 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1782 
1783 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1784 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1785 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1786 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1787 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1788 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1789 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1790 
1791 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1792 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1793 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1794 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1795 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1796 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1797 
1798 #undef __FUNC__
1799 #define __FUNC__ "MatNorm_SeqBAIJ"
1800 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1801 {
1802   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1803   Scalar      *v = a->a;
1804   double      sum = 0.0;
1805   int         i,nz=a->nz,bs2=a->bs2;
1806 
1807   if (type == NORM_FROBENIUS) {
1808     for (i=0; i< bs2*nz; i++ ) {
1809 #if defined(PETSC_COMPLEX)
1810       sum += real(conj(*v)*(*v)); v++;
1811 #else
1812       sum += (*v)*(*v); v++;
1813 #endif
1814     }
1815     *norm = sqrt(sum);
1816   }
1817   else {
1818     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
1819   }
1820   return 0;
1821 }
1822 
1823 /*
1824      note: This can only work for identity for row and col. It would
1825    be good to check this and otherwise generate an error.
1826 */
1827 #undef __FUNC__
1828 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1829 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1830 {
1831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1832   Mat         outA;
1833   int         ierr;
1834 
1835   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
1836 
1837   outA          = inA;
1838   inA->factor   = FACTOR_LU;
1839   a->row        = row;
1840   a->col        = col;
1841 
1842   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1843 
1844   if (!a->diag) {
1845     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1846   }
1847   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1848   return 0;
1849 }
1850 
1851 #undef __FUNC__
1852 #define __FUNC__ "MatScale_SeqBAIJ"
1853 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1854 {
1855   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1856   int         one = 1, totalnz = a->bs2*a->nz;
1857   BLscal_( &totalnz, alpha, a->a, &one );
1858   PLogFlops(totalnz);
1859   return 0;
1860 }
1861 
1862 #undef __FUNC__
1863 #define __FUNC__ "MatGetValues_SeqBAIJ"
1864 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1865 {
1866   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1867   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1868   int        *ai = a->i, *ailen = a->ilen;
1869   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1870   Scalar     *ap, *aa = a->a, zero = 0.0;
1871 
1872   for ( k=0; k<m; k++ ) { /* loop over rows */
1873     row  = im[k]; brow = row/bs;
1874     if (row < 0) SETERRQ(1,0,"Negative row");
1875     if (row >= a->m) SETERRQ(1,0,"Row too large");
1876     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1877     nrow = ailen[brow];
1878     for ( l=0; l<n; l++ ) { /* loop over columns */
1879       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1880       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1881       col  = in[l] ;
1882       bcol = col/bs;
1883       cidx = col%bs;
1884       ridx = row%bs;
1885       high = nrow;
1886       low  = 0; /* assume unsorted */
1887       while (high-low > 5) {
1888         t = (low+high)/2;
1889         if (rp[t] > bcol) high = t;
1890         else             low  = t;
1891       }
1892       for ( i=low; i<high; i++ ) {
1893         if (rp[i] > bcol) break;
1894         if (rp[i] == bcol) {
1895           *v++ = ap[bs2*i+bs*cidx+ridx];
1896           goto finished;
1897         }
1898       }
1899       *v++ = zero;
1900       finished:;
1901     }
1902   }
1903   return 0;
1904 }
1905 
1906 #undef __FUNC__
1907 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */
1908 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1909 {
1910   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1911   *bs = baij->bs;
1912   return 0;
1913 }
1914 
1915 /* idx should be of length atlease bs */
1916 #undef __FUNC__
1917 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1918 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1919 {
1920   int i,row;
1921   row = idx[0];
1922   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1923 
1924   for ( i=1; i<bs; i++ ) {
1925     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1926   }
1927   *flg = PETSC_TRUE;
1928   return 0;
1929 }
1930 
1931 #undef __FUNC__
1932 #define __FUNC__ "MatZeroRows_SeqBAIJ"
1933 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1934 {
1935   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1936   IS          is_local;
1937   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1938   PetscTruth  flg;
1939   Scalar      zero = 0.0,*aa;
1940 
1941   /* Make a copy of the IS and  sort it */
1942   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1943   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1944   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1945   ierr = ISSort(is_local); CHKERRQ(ierr);
1946   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1947 
1948   i = 0;
1949   while (i < is_n) {
1950     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1951     flg = PETSC_FALSE;
1952     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1953     if (flg) { /* There exists a block of rows to be Zerowed */
1954       baij->ilen[rows[i]/bs] = 0;
1955       i += bs;
1956     } else { /* Zero out only the requested row */
1957       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1958       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1959       for ( j=0; j<count; j++ ) {
1960         aa[0] = zero;
1961         aa+=bs;
1962       }
1963       i++;
1964     }
1965   }
1966   if (diag) {
1967     for ( j=0; j<is_n; j++ ) {
1968       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1969     }
1970   }
1971   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1972   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1973   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1974   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1975 
1976   return 0;
1977 }
1978 
1979 #undef __FUNC__
1980 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */
1981 int MatPrintHelp_SeqBAIJ(Mat A)
1982 {
1983   static int called = 0;
1984   MPI_Comm   comm = A->comm;
1985 
1986   if (called) return 0; else called = 1;
1987   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1988   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1989   return 0;
1990 }
1991 
1992 /* -------------------------------------------------------------------*/
1993 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1994        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1995        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1996        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1997        MatSolve_SeqBAIJ_N,0,
1998        0,0,
1999        MatLUFactor_SeqBAIJ,0,
2000        0,
2001        MatTranspose_SeqBAIJ,
2002        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
2003        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2004        0,MatAssemblyEnd_SeqBAIJ,
2005        0,
2006        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
2007        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2008        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2009        MatILUFactorSymbolic_SeqBAIJ,0,
2010        0,0,
2011        MatConvertSameType_SeqBAIJ,0,0,
2012        MatILUFactor_SeqBAIJ,0,0,
2013        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
2014        MatGetValues_SeqBAIJ,0,
2015        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
2016        0,0,0,MatGetBlockSize_SeqBAIJ,
2017        MatGetRowIJ_SeqBAIJ,
2018        MatRestoreRowIJ_SeqBAIJ,
2019        0,0,0,0,0,0,
2020        MatSetValuesBlocked_SeqBAIJ};
2021 
2022 #undef __FUNC__
2023 #define __FUNC__ "MatCreateSeqBAIJ"
2024 /*@C
2025    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2026    compressed row) format.  For good matrix assembly performance the
2027    user should preallocate the matrix storage by setting the parameter nz
2028    (or the array nzz).  By setting these parameters accurately, performance
2029    during matrix assembly can be increased by more than a factor of 50.
2030 
2031    Input Parameters:
2032 .  comm - MPI communicator, set to PETSC_COMM_SELF
2033 .  bs - size of block
2034 .  m - number of rows
2035 .  n - number of columns
2036 .  nz - number of block nonzeros per block row (same for all rows)
2037 .  nzz - array containing the number of block nonzeros in the various block rows
2038          (possibly different for each block row) or PETSC_NULL
2039 
2040    Output Parameter:
2041 .  A - the matrix
2042 
2043    Options Database Keys:
2044 $    -mat_no_unroll - uses code that does not unroll the loops in the
2045 $                     block calculations (much solver)
2046 $    -mat_block_size - size of the blocks to use
2047 
2048    Notes:
2049    The block AIJ format is fully compatible with standard Fortran 77
2050    storage.  That is, the stored row and column indices can begin at
2051    either one (as in Fortran) or zero.  See the users' manual for details.
2052 
2053    Specify the preallocated storage with either nz or nnz (not both).
2054    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2055    allocation.  For additional details, see the users manual chapter on
2056    matrices.
2057 
2058 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2059 @*/
2060 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
2061 {
2062   Mat         B;
2063   Mat_SeqBAIJ *b;
2064   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
2065 
2066   MPI_Comm_size(comm,&size);
2067   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2068 
2069   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
2070 
2071   if (mbs*bs!=m || nbs*bs!=n)
2072     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
2073 
2074   *A = 0;
2075   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
2076   PLogObjectCreate(B);
2077   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
2078   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2079   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
2080   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
2081   if (!flg) {
2082     switch (bs) {
2083     case 1:
2084       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2085       B->ops.solve           = MatSolve_SeqBAIJ_1;
2086       B->ops.mult            = MatMult_SeqBAIJ_1;
2087       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
2088       break;
2089     case 2:
2090       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2091       B->ops.solve           = MatSolve_SeqBAIJ_2;
2092       B->ops.mult            = MatMult_SeqBAIJ_2;
2093       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
2094       break;
2095     case 3:
2096       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2097       B->ops.solve           = MatSolve_SeqBAIJ_3;
2098       B->ops.mult            = MatMult_SeqBAIJ_3;
2099       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
2100       break;
2101     case 4:
2102       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2103       B->ops.solve           = MatSolve_SeqBAIJ_4;
2104       B->ops.mult            = MatMult_SeqBAIJ_4;
2105       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
2106       break;
2107     case 5:
2108       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2109       B->ops.solve           = MatSolve_SeqBAIJ_5;
2110       B->ops.mult            = MatMult_SeqBAIJ_5;
2111       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
2112       break;
2113     case 7:
2114       B->ops.mult            = MatMult_SeqBAIJ_7;
2115       B->ops.solve           = MatSolve_SeqBAIJ_7;
2116       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2117       break;
2118     }
2119   }
2120   B->destroy          = MatDestroy_SeqBAIJ;
2121   B->view             = MatView_SeqBAIJ;
2122   B->factor           = 0;
2123   B->lupivotthreshold = 1.0;
2124   B->mapping          = 0;
2125   b->row              = 0;
2126   b->col              = 0;
2127   b->reallocs         = 0;
2128 
2129   b->m       = m; B->m = m; B->M = m;
2130   b->n       = n; B->n = n; B->N = n;
2131   b->mbs     = mbs;
2132   b->nbs     = nbs;
2133   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
2134   if (nnz == PETSC_NULL) {
2135     if (nz == PETSC_DEFAULT) nz = 5;
2136     else if (nz <= 0)        nz = 1;
2137     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2138     nz = nz*mbs;
2139   }
2140   else {
2141     nz = 0;
2142     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
2143   }
2144 
2145   /* allocate the matrix space */
2146   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
2147   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
2148   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
2149   b->j  = (int *) (b->a + nz*bs2);
2150   PetscMemzero(b->j,nz*sizeof(int));
2151   b->i  = b->j + nz;
2152   b->singlemalloc = 1;
2153 
2154   b->i[0] = 0;
2155   for (i=1; i<mbs+1; i++) {
2156     b->i[i] = b->i[i-1] + b->imax[i-1];
2157   }
2158 
2159   /* b->ilen will count nonzeros in each block row so far. */
2160   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2161   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2162   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
2163 
2164   b->bs               = bs;
2165   b->bs2              = bs2;
2166   b->mbs              = mbs;
2167   b->nz               = 0;
2168   b->maxnz            = nz*bs2;
2169   b->sorted           = 0;
2170   b->roworiented      = 1;
2171   b->nonew            = 0;
2172   b->diag             = 0;
2173   b->solve_work       = 0;
2174   b->mult_work        = 0;
2175   b->spptr            = 0;
2176   B->info.nz_unneeded = (double)b->maxnz;
2177 
2178   *A = B;
2179   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
2180   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
2181   return 0;
2182 }
2183 
2184 #undef __FUNC__
2185 #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2186 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
2187 {
2188   Mat         C;
2189   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
2190   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2191 
2192   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
2193 
2194   *B = 0;
2195   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
2196   PLogObjectCreate(C);
2197   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
2198   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2199   C->destroy    = MatDestroy_SeqBAIJ;
2200   C->view       = MatView_SeqBAIJ;
2201   C->factor     = A->factor;
2202   c->row        = 0;
2203   c->col        = 0;
2204   C->assembled  = PETSC_TRUE;
2205 
2206   c->m = C->m   = a->m;
2207   c->n = C->n   = a->n;
2208   C->M          = a->m;
2209   C->N          = a->n;
2210 
2211   c->bs         = a->bs;
2212   c->bs2        = a->bs2;
2213   c->mbs        = a->mbs;
2214   c->nbs        = a->nbs;
2215 
2216   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2217   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2218   for ( i=0; i<mbs; i++ ) {
2219     c->imax[i] = a->imax[i];
2220     c->ilen[i] = a->ilen[i];
2221   }
2222 
2223   /* allocate the matrix space */
2224   c->singlemalloc = 1;
2225   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
2226   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
2227   c->j  = (int *) (c->a + nz*bs2);
2228   c->i  = c->j + nz;
2229   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2230   if (mbs > 0) {
2231     PetscMemcpy(c->j,a->j,nz*sizeof(int));
2232     if (cpvalues == COPY_VALUES) {
2233       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
2234     }
2235   }
2236 
2237   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2238   c->sorted      = a->sorted;
2239   c->roworiented = a->roworiented;
2240   c->nonew       = a->nonew;
2241 
2242   if (a->diag) {
2243     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2244     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2245     for ( i=0; i<mbs; i++ ) {
2246       c->diag[i] = a->diag[i];
2247     }
2248   }
2249   else c->diag          = 0;
2250   c->nz                 = a->nz;
2251   c->maxnz              = a->maxnz;
2252   c->solve_work         = 0;
2253   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
2254   c->mult_work          = 0;
2255   *B = C;
2256   return 0;
2257 }
2258 
2259 #undef __FUNC__
2260 #define __FUNC__ "MatLoad_SeqBAIJ"
2261 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
2262 {
2263   Mat_SeqBAIJ  *a;
2264   Mat          B;
2265   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2266   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2267   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2268   int          *masked, nmask,tmp,bs2,ishift;
2269   Scalar       *aa;
2270   MPI_Comm     comm = ((PetscObject) viewer)->comm;
2271 
2272   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2273   bs2  = bs*bs;
2274 
2275   MPI_Comm_size(comm,&size);
2276   if (size > 1) SETERRQ(1,0,"view must have one processor");
2277   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2278   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2279   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
2280   M = header[1]; N = header[2]; nz = header[3];
2281 
2282   if (M != N) SETERRQ(1,0,"Can only do square matrices");
2283 
2284   /*
2285      This code adds extra rows to make sure the number of rows is
2286     divisible by the blocksize
2287   */
2288   mbs        = M/bs;
2289   extra_rows = bs - M + bs*(mbs);
2290   if (extra_rows == bs) extra_rows = 0;
2291   else                  mbs++;
2292   if (extra_rows) {
2293     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2294   }
2295 
2296   /* read in row lengths */
2297   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2298   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
2299   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2300 
2301   /* read in column indices */
2302   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2303   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
2304   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2305 
2306   /* loop over row lengths determining block row lengths */
2307   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2308   PetscMemzero(browlengths,mbs*sizeof(int));
2309   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
2310   PetscMemzero(mask,mbs*sizeof(int));
2311   masked = mask + mbs;
2312   rowcount = 0; nzcount = 0;
2313   for ( i=0; i<mbs; i++ ) {
2314     nmask = 0;
2315     for ( j=0; j<bs; j++ ) {
2316       kmax = rowlengths[rowcount];
2317       for ( k=0; k<kmax; k++ ) {
2318         tmp = jj[nzcount++]/bs;
2319         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2320       }
2321       rowcount++;
2322     }
2323     browlengths[i] += nmask;
2324     /* zero out the mask elements we set */
2325     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2326   }
2327 
2328   /* create our matrix */
2329   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
2330          CHKERRQ(ierr);
2331   B = *A;
2332   a = (Mat_SeqBAIJ *) B->data;
2333 
2334   /* set matrix "i" values */
2335   a->i[0] = 0;
2336   for ( i=1; i<= mbs; i++ ) {
2337     a->i[i]      = a->i[i-1] + browlengths[i-1];
2338     a->ilen[i-1] = browlengths[i-1];
2339   }
2340   a->nz         = 0;
2341   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2342 
2343   /* read in nonzero values */
2344   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2345   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2346   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2347 
2348   /* set "a" and "j" values into matrix */
2349   nzcount = 0; jcount = 0;
2350   for ( i=0; i<mbs; i++ ) {
2351     nzcountb = nzcount;
2352     nmask    = 0;
2353     for ( j=0; j<bs; j++ ) {
2354       kmax = rowlengths[i*bs+j];
2355       for ( k=0; k<kmax; k++ ) {
2356         tmp = jj[nzcount++]/bs;
2357 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2358       }
2359       rowcount++;
2360     }
2361     /* sort the masked values */
2362     PetscSortInt(nmask,masked);
2363 
2364     /* set "j" values into matrix */
2365     maskcount = 1;
2366     for ( j=0; j<nmask; j++ ) {
2367       a->j[jcount++]  = masked[j];
2368       mask[masked[j]] = maskcount++;
2369     }
2370     /* set "a" values into matrix */
2371     ishift = bs2*a->i[i];
2372     for ( j=0; j<bs; j++ ) {
2373       kmax = rowlengths[i*bs+j];
2374       for ( k=0; k<kmax; k++ ) {
2375         tmp    = jj[nzcountb]/bs ;
2376         block  = mask[tmp] - 1;
2377         point  = jj[nzcountb] - bs*tmp;
2378         idx    = ishift + bs2*block + j + bs*point;
2379         a->a[idx] = aa[nzcountb++];
2380       }
2381     }
2382     /* zero out the mask elements we set */
2383     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2384   }
2385   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2386 
2387   PetscFree(rowlengths);
2388   PetscFree(browlengths);
2389   PetscFree(aa);
2390   PetscFree(jj);
2391   PetscFree(mask);
2392 
2393   B->assembled = PETSC_TRUE;
2394 
2395   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2396   if (flg) {
2397     Viewer tviewer;
2398     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2399     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2400     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2401     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2402   }
2403   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2404   if (flg) {
2405     Viewer tviewer;
2406     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2407     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2408     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2409     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2410   }
2411   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2412   if (flg) {
2413     Viewer tviewer;
2414     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2415     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2416     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2417   }
2418   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2419   if (flg) {
2420     Viewer tviewer;
2421     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2422     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2423     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2424     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2425   }
2426   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2427   if (flg) {
2428     Viewer tviewer;
2429     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2430     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2431     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2432     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2433   }
2434   return 0;
2435 }
2436 
2437 
2438 
2439