xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f1241b543c8a967e1ec0272e804ddc570efd5e33)
12593348eSBarry Smith #ifndef lint
2*f1241b54SBarry Smith static char vcid[] = "$Id: baij.c,v 1.90 1997/02/07 16:33:49 balay Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
195615d1e5SSatish Balay #undef __FUNC__
205615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
22de6a44a3SBarry Smith {
23de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
247fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
25de6a44a3SBarry Smith 
26de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
287fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
29de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30de6a44a3SBarry Smith       if (a->j[j] == i) {
31de6a44a3SBarry Smith         diag[i] = j;
32de6a44a3SBarry Smith         break;
33de6a44a3SBarry Smith       }
34de6a44a3SBarry Smith     }
35de6a44a3SBarry Smith   }
36de6a44a3SBarry Smith   a->diag = diag;
37de6a44a3SBarry Smith   return 0;
38de6a44a3SBarry Smith }
392593348eSBarry Smith 
402593348eSBarry Smith #include "draw.h"
412593348eSBarry Smith #include "pinclude/pviewer.h"
4277c4ece6SBarry Smith #include "sys.h"
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
725615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
935615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
1197e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
1347e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
1515615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes #else
18144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18344cd7ae7SLois Curfman McInnes #endif
18444cd7ae7SLois Curfman McInnes           }
18544cd7ae7SLois Curfman McInnes         }
18644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18744cd7ae7SLois Curfman McInnes       }
18844cd7ae7SLois Curfman McInnes     }
18944cd7ae7SLois Curfman McInnes   }
1902593348eSBarry Smith   else {
191b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
192b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
193b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
194b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1977e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1997e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20088685aaeSLois Curfman McInnes           }
20188685aaeSLois Curfman McInnes           else {
2027e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes #else
2057e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20688685aaeSLois Curfman McInnes #endif
2072593348eSBarry Smith           }
2082593348eSBarry Smith         }
2092593348eSBarry Smith         fprintf(fd,"\n");
2102593348eSBarry Smith       }
2112593348eSBarry Smith     }
212b6490206SBarry Smith   }
2132593348eSBarry Smith   fflush(fd);
2142593348eSBarry Smith   return 0;
2152593348eSBarry Smith }
2162593348eSBarry Smith 
2175615d1e5SSatish Balay #undef __FUNC__
2185615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Draw"
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
245b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
259b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
274b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
305b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
319b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
334b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
3445615d1e5SSatish Balay #undef __FUNC__
3455615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ"
346b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3472593348eSBarry Smith {
3482593348eSBarry Smith   Mat         A = (Mat) obj;
34919bcc07fSBarry Smith   ViewerType  vtype;
35019bcc07fSBarry Smith   int         ierr;
3512593348eSBarry Smith 
35219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35319bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
354e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
360b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3612593348eSBarry Smith   }
36219bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3633270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3642593348eSBarry Smith   }
3652593348eSBarry Smith   return 0;
3662593348eSBarry Smith }
367b6490206SBarry Smith 
368119a934aSSatish Balay #define CHUNKSIZE  10
369cd0e1443SSatish Balay 
3705615d1e5SSatish Balay #undef __FUNC__
3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373cd0e1443SSatish Balay {
374cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3789df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
379cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
380cd0e1443SSatish Balay 
381cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
382cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
384e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
385e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3863b2fbd54SBarry Smith #endif
3872c3acbe9SBarry Smith     rp   = aj + ai[brow];
3882c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3892c3acbe9SBarry Smith     rmax = imax[brow];
3902c3acbe9SBarry Smith     nrow = ailen[brow];
391cd0e1443SSatish Balay     low  = 0;
392cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
394e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
3963b2fbd54SBarry Smith #endif
397a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
398cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
399cd0e1443SSatish Balay       if (roworiented) {
400cd0e1443SSatish Balay         value = *v++;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       else {
403cd0e1443SSatish Balay         value = v[k + l*m];
404cd0e1443SSatish Balay       }
405cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4062c3acbe9SBarry Smith       while (high-low > 7) {
407cd0e1443SSatish Balay         t = (low+high)/2;
408cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
409cd0e1443SSatish Balay         else              low  = t;
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
412cd0e1443SSatish Balay         if (rp[i] > bcol) break;
413cd0e1443SSatish Balay         if (rp[i] == bcol) {
4147e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
415cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
416cd0e1443SSatish Balay           else                  *bap  = value;
417*f1241b54SBarry Smith           goto noinsert1;
418cd0e1443SSatish Balay         }
419cd0e1443SSatish Balay       }
420*f1241b54SBarry Smith       if (nonew) goto noinsert1;
421cd0e1443SSatish Balay       if (nrow >= rmax) {
422cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
423cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424cd0e1443SSatish Balay         Scalar *new_a;
425cd0e1443SSatish Balay 
426cd0e1443SSatish Balay         /* malloc new storage space */
4277e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4297e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
430cd0e1443SSatish Balay         new_i   = new_j + new_nz;
431cd0e1443SSatish Balay 
432cd0e1443SSatish Balay         /* copy over old data into new slots */
433cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4347e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438cd0e1443SSatish Balay                                                            len*sizeof(int));
439a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443cd0e1443SSatish Balay         /* free up old matrix storage */
444cd0e1443SSatish Balay         PetscFree(a->a);
445cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447cd0e1443SSatish Balay         a->singlemalloc = 1;
448cd0e1443SSatish Balay 
449a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4517e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
45218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
453cd0e1443SSatish Balay         a->reallocs++;
454119a934aSSatish Balay         a->nz++;
455cd0e1443SSatish Balay       }
4567e67e3f9SSatish Balay       N = nrow++ - 1;
457cd0e1443SSatish Balay       /* shift up all the later entries in this row */
458cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
459cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4607e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461cd0e1443SSatish Balay       }
4627e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463cd0e1443SSatish Balay       rp[i]                      = bcol;
4647e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
465*f1241b54SBarry Smith       noinsert1:;
466cd0e1443SSatish Balay       low = i;
467cd0e1443SSatish Balay     }
468cd0e1443SSatish Balay     ailen[brow] = nrow;
469cd0e1443SSatish Balay   }
470cd0e1443SSatish Balay   return 0;
471cd0e1443SSatish Balay }
472cd0e1443SSatish Balay 
4735615d1e5SSatish Balay #undef __FUNC__
47492c4ed94SBarry Smith #define __FUNC__ "MatSetValues_SeqBAIJ"
47592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
47692c4ed94SBarry Smith {
47792c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4788a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
4790e324ae4SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
4800e324ae4SSatish Balay   int         roworiented=a->roworiented;
4818a84c255SSatish Balay   int         *aj=a->j,nonew=a->nonew;
4820e324ae4SSatish Balay   int          bs2=a->bs2,bs=a->bs,stepval;
4830e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
48492c4ed94SBarry Smith 
4850e324ae4SSatish Balay   if (roworiented) {
4860e324ae4SSatish Balay     stepval = (n-1)*bs;
4870e324ae4SSatish Balay   } else {
4880e324ae4SSatish Balay     stepval = (m-1)*bs;
4890e324ae4SSatish Balay   }
49092c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
49192c4ed94SBarry Smith     row  = im[k];
49292c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
49392c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
4948a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
49592c4ed94SBarry Smith #endif
49692c4ed94SBarry Smith     rp   = aj + ai[row];
49792c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
49892c4ed94SBarry Smith     rmax = imax[row];
49992c4ed94SBarry Smith     nrow = ailen[row];
50092c4ed94SBarry Smith     low  = 0;
50192c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
50292c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
50392c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
5048a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
50592c4ed94SBarry Smith #endif
50692c4ed94SBarry Smith       col = in[l];
50792c4ed94SBarry Smith       if (roworiented) {
5080e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5090e324ae4SSatish Balay       } else {
5100e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
51192c4ed94SBarry Smith       }
51292c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
51392c4ed94SBarry Smith       while (high-low > 7) {
51492c4ed94SBarry Smith         t = (low+high)/2;
51592c4ed94SBarry Smith         if (rp[t] > col) high = t;
51692c4ed94SBarry Smith         else             low  = t;
51792c4ed94SBarry Smith       }
51892c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
51992c4ed94SBarry Smith         if (rp[i] > col) break;
52092c4ed94SBarry Smith         if (rp[i] == col) {
5218a84c255SSatish Balay           bap  = ap +  bs2*i;
5220e324ae4SSatish Balay           if (roworiented) {
5238a84c255SSatish Balay             if (is == ADD_VALUES) {
5240e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5258a84c255SSatish Balay                 for (jj=ii; jj<bs2; jj+=bs )
5268a84c255SSatish Balay                   bap[jj] += *value++;
5270e324ae4SSatish Balay             } else {
5280e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5290e324ae4SSatish Balay                 for (jj=ii; jj<bs2; jj+=bs )
5300e324ae4SSatish Balay                   bap[jj] = *value++;
5318a84c255SSatish Balay             }
5320e324ae4SSatish Balay           } else {
5330e324ae4SSatish Balay             if (is == ADD_VALUES) {
5340e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5350e324ae4SSatish Balay                 for (jj=0; jj<bs; jj++ )
5360e324ae4SSatish Balay                   *bap++ += *value++;
5370e324ae4SSatish Balay             } else {
5380e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5390e324ae4SSatish Balay                 for (jj=0; jj<bs; jj++ )
5400e324ae4SSatish Balay                   *bap++  = *value++;
5410e324ae4SSatish Balay             }
5428a84c255SSatish Balay           }
543*f1241b54SBarry Smith           goto noinsert2;
54492c4ed94SBarry Smith         }
54592c4ed94SBarry Smith       }
546*f1241b54SBarry Smith       if (nonew) goto noinsert2;
54792c4ed94SBarry Smith       if (nrow >= rmax) {
54892c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
54992c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
55092c4ed94SBarry Smith         Scalar *new_a;
55192c4ed94SBarry Smith 
55292c4ed94SBarry Smith         /* malloc new storage space */
55392c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
55492c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
55592c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
55692c4ed94SBarry Smith         new_i   = new_j + new_nz;
55792c4ed94SBarry Smith 
55892c4ed94SBarry Smith         /* copy over old data into new slots */
55992c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
56092c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
56192c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
56292c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
56392c4ed94SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
56492c4ed94SBarry Smith                                                            len*sizeof(int));
56592c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
56692c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
56792c4ed94SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
56892c4ed94SBarry Smith                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
56992c4ed94SBarry Smith         /* free up old matrix storage */
57092c4ed94SBarry Smith         PetscFree(a->a);
57192c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
57292c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
57392c4ed94SBarry Smith         a->singlemalloc = 1;
57492c4ed94SBarry Smith 
57592c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
57692c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
57792c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
57892c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
57992c4ed94SBarry Smith         a->reallocs++;
58092c4ed94SBarry Smith         a->nz++;
58192c4ed94SBarry Smith       }
58292c4ed94SBarry Smith       N = nrow++ - 1;
58392c4ed94SBarry Smith       /* shift up all the later entries in this row */
58492c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
58592c4ed94SBarry Smith         rp[ii+1] = rp[ii];
58692c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
58792c4ed94SBarry Smith       }
58892c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
58992c4ed94SBarry Smith       rp[i] = col;
5908a84c255SSatish Balay       bap   = ap +  bs2*i;
5910e324ae4SSatish Balay       if (roworiented) {
5920e324ae4SSatish Balay         for ( ii=0; ii<bs; ii++,value+=stepval)
5938a84c255SSatish Balay           for (jj=ii; jj<bs2; jj+=bs )
5940e324ae4SSatish Balay             bap[jj] = *value++;
5950e324ae4SSatish Balay       } else {
5960e324ae4SSatish Balay         for ( ii=0; ii<bs; ii++,value+=stepval )
5970e324ae4SSatish Balay           for (jj=0; jj<bs; jj++ )
5980e324ae4SSatish Balay             *bap++  = *value++;
5990e324ae4SSatish Balay       }
600*f1241b54SBarry Smith       noinsert2:;
60192c4ed94SBarry Smith       low = i;
60292c4ed94SBarry Smith     }
60392c4ed94SBarry Smith     ailen[row] = nrow;
60492c4ed94SBarry Smith   }
60592c4ed94SBarry Smith   return 0;
60692c4ed94SBarry Smith }
60792c4ed94SBarry Smith 
60892c4ed94SBarry Smith #undef __FUNC__
6095615d1e5SSatish Balay #define __FUNC__ "MatGetSize_SeqBAIJ"
6100b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6110b824a48SSatish Balay {
6120b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6130b824a48SSatish Balay   *m = a->m; *n = a->n;
6140b824a48SSatish Balay   return 0;
6150b824a48SSatish Balay }
6160b824a48SSatish Balay 
6175615d1e5SSatish Balay #undef __FUNC__
6185615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
6190b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6200b824a48SSatish Balay {
6210b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6220b824a48SSatish Balay   *m = 0; *n = a->m;
6230b824a48SSatish Balay   return 0;
6240b824a48SSatish Balay }
6250b824a48SSatish Balay 
6265615d1e5SSatish Balay #undef __FUNC__
6275615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6289867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6299867e207SSatish Balay {
6309867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6317e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6329867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6339867e207SSatish Balay 
6349867e207SSatish Balay   bs  = a->bs;
6359867e207SSatish Balay   ai  = a->i;
6369867e207SSatish Balay   aj  = a->j;
6379867e207SSatish Balay   aa  = a->a;
6389df24120SSatish Balay   bs2 = a->bs2;
6399867e207SSatish Balay 
640e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6419867e207SSatish Balay 
6429867e207SSatish Balay   bn  = row/bs;   /* Block number */
6439867e207SSatish Balay   bp  = row % bs; /* Block Position */
6449867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6459867e207SSatish Balay   *nz = bs*M;
6469867e207SSatish Balay 
6479867e207SSatish Balay   if (v) {
6489867e207SSatish Balay     *v = 0;
6499867e207SSatish Balay     if (*nz) {
6509867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6519867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6529867e207SSatish Balay         v_i  = *v + i*bs;
6537e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6547e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6559867e207SSatish Balay       }
6569867e207SSatish Balay     }
6579867e207SSatish Balay   }
6589867e207SSatish Balay 
6599867e207SSatish Balay   if (idx) {
6609867e207SSatish Balay     *idx = 0;
6619867e207SSatish Balay     if (*nz) {
6629867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6639867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6649867e207SSatish Balay         idx_i = *idx + i*bs;
6659867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6669867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6679867e207SSatish Balay       }
6689867e207SSatish Balay     }
6699867e207SSatish Balay   }
6709867e207SSatish Balay   return 0;
6719867e207SSatish Balay }
6729867e207SSatish Balay 
6735615d1e5SSatish Balay #undef __FUNC__
6745615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6759867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6769867e207SSatish Balay {
6779867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6789867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
6799867e207SSatish Balay   return 0;
6809867e207SSatish Balay }
681b6490206SBarry Smith 
6825615d1e5SSatish Balay #undef __FUNC__
6835615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
684f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
685f2501298SSatish Balay {
686f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
687f2501298SSatish Balay   Mat         C;
688f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
6899df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
690f2501298SSatish Balay   Scalar      *array=a->a;
691f2501298SSatish Balay 
692f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
693e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
694f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
695f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
696a7c10996SSatish Balay 
697a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
698f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
699f2501298SSatish Balay   PetscFree(col);
700f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
701f2501298SSatish Balay   cols = rows + bs;
702f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
703f2501298SSatish Balay     cols[0] = i*bs;
704f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
705f2501298SSatish Balay     len = ai[i+1] - ai[i];
706f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
707f2501298SSatish Balay       rows[0] = (*aj++)*bs;
708f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
709f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
710f2501298SSatish Balay       array += bs2;
711f2501298SSatish Balay     }
712f2501298SSatish Balay   }
7131073c447SSatish Balay   PetscFree(rows);
714f2501298SSatish Balay 
7156d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7166d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
717f2501298SSatish Balay 
718f2501298SSatish Balay   if (B != PETSC_NULL) {
719f2501298SSatish Balay     *B = C;
720f2501298SSatish Balay   } else {
721f2501298SSatish Balay     /* This isn't really an in-place transpose */
722f2501298SSatish Balay     PetscFree(a->a);
723f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
724f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
725f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
726f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
727f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
728f2501298SSatish Balay     PetscFree(a);
729f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
730f2501298SSatish Balay     PetscHeaderDestroy(C);
731f2501298SSatish Balay   }
732f2501298SSatish Balay   return 0;
733f2501298SSatish Balay }
734f2501298SSatish Balay 
735f2501298SSatish Balay 
7365615d1e5SSatish Balay #undef __FUNC__
7375615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
738584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
739584200bdSSatish Balay {
740584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
741584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
742a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
743d402145bSBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax;
744584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
745584200bdSSatish Balay 
7466d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
747584200bdSSatish Balay 
748d402145bSBarry Smith   rmax = ailen[0];
749584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
750584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
751584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
752d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
753584200bdSSatish Balay     if (fshift) {
754a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
755584200bdSSatish Balay       N = ailen[i];
756584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
757584200bdSSatish Balay         ip[j-fshift] = ip[j];
7587e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
759584200bdSSatish Balay       }
760584200bdSSatish Balay     }
761584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
762584200bdSSatish Balay   }
763584200bdSSatish Balay   if (mbs) {
764584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
765584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
766584200bdSSatish Balay   }
767584200bdSSatish Balay   /* reset ilen and imax for each row */
768584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
769584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
770584200bdSSatish Balay   }
771a7c10996SSatish Balay   a->nz = ai[mbs];
772584200bdSSatish Balay 
773584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
774584200bdSSatish Balay   if (fshift && a->diag) {
775584200bdSSatish Balay     PetscFree(a->diag);
776584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
777584200bdSSatish Balay     a->diag = 0;
778584200bdSSatish Balay   }
7793d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
780219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
7813d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
782584200bdSSatish Balay            a->reallocs);
783d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
784e2f3b5e9SSatish Balay   a->reallocs          = 0;
7854e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
7864e220ebcSLois Curfman McInnes 
787584200bdSSatish Balay   return 0;
788584200bdSSatish Balay }
789584200bdSSatish Balay 
7905615d1e5SSatish Balay #undef __FUNC__
7915615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
792b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
7932593348eSBarry Smith {
794b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7959df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
7962593348eSBarry Smith   return 0;
7972593348eSBarry Smith }
7982593348eSBarry Smith 
7995615d1e5SSatish Balay #undef __FUNC__
8005615d1e5SSatish Balay #define __FUNC__ "MatDestroy_SeqBAIJ"
801b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8022593348eSBarry Smith {
8032593348eSBarry Smith   Mat         A  = (Mat) obj;
804b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
80590f02eecSBarry Smith   int         ierr;
8062593348eSBarry Smith 
8072593348eSBarry Smith #if defined(PETSC_LOG)
8082593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8092593348eSBarry Smith #endif
8102593348eSBarry Smith   PetscFree(a->a);
8112593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8122593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8132593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8142593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8152593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
816de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8172593348eSBarry Smith   PetscFree(a);
81890f02eecSBarry Smith   if (A->mapping) {
81990f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
82090f02eecSBarry Smith   }
821f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
822f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8232593348eSBarry Smith   return 0;
8242593348eSBarry Smith }
8252593348eSBarry Smith 
8265615d1e5SSatish Balay #undef __FUNC__
8275615d1e5SSatish Balay #define __FUNC__ "MatSetOption_SeqBAIJ"
828b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8292593348eSBarry Smith {
830b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8316d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
8326d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
8336d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
834219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
8356d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
8366d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
8376d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
838219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8396d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8406d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
84190f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
84290f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
84394a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8446d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
845e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8462593348eSBarry Smith   else
847e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8482593348eSBarry Smith   return 0;
8492593348eSBarry Smith }
8502593348eSBarry Smith 
8512593348eSBarry Smith 
8522593348eSBarry Smith /* -------------------------------------------------------*/
8532593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8542593348eSBarry Smith /* -------------------------------------------------------*/
855b6490206SBarry Smith #include "pinclude/plapack.h"
856b6490206SBarry Smith 
8575615d1e5SSatish Balay #undef __FUNC__
8585615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
85939b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8602593348eSBarry Smith {
861b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
86239b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
863c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8642593348eSBarry Smith 
865c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
866c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
867b6490206SBarry Smith 
868119a934aSSatish Balay   idx   = a->j;
869119a934aSSatish Balay   v     = a->a;
870119a934aSSatish Balay   ii    = a->i;
871119a934aSSatish Balay 
872119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
873119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
874119a934aSSatish Balay     sum  = 0.0;
875119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8761a6a6d98SBarry Smith     z[i] = sum;
877119a934aSSatish Balay   }
878c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
879c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
88039b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
88139b95ed1SBarry Smith   return 0;
88239b95ed1SBarry Smith }
88339b95ed1SBarry Smith 
8845615d1e5SSatish Balay #undef __FUNC__
8855615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
88639b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
88739b95ed1SBarry Smith {
88839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
88939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
89039b95ed1SBarry Smith   register Scalar x1,x2;
89139b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
892c16cb8f2SBarry Smith   int             j,n;
89339b95ed1SBarry Smith 
894c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
895c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
89639b95ed1SBarry Smith 
89739b95ed1SBarry Smith   idx   = a->j;
89839b95ed1SBarry Smith   v     = a->a;
89939b95ed1SBarry Smith   ii    = a->i;
90039b95ed1SBarry Smith 
901119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
902119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
903119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
904119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
905119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
906119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
907119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
908119a934aSSatish Balay       v += 4;
909119a934aSSatish Balay     }
9101a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
911119a934aSSatish Balay     z += 2;
912119a934aSSatish Balay   }
913c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
914c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
91539b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
91639b95ed1SBarry Smith   return 0;
91739b95ed1SBarry Smith }
91839b95ed1SBarry Smith 
9195615d1e5SSatish Balay #undef __FUNC__
9205615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
92139b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
92239b95ed1SBarry Smith {
92339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
92439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
925c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
92639b95ed1SBarry Smith 
927c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
928c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
92939b95ed1SBarry Smith 
93039b95ed1SBarry Smith   idx   = a->j;
93139b95ed1SBarry Smith   v     = a->a;
93239b95ed1SBarry Smith   ii    = a->i;
93339b95ed1SBarry Smith 
934119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
935119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
936119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
937119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
938119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
939119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
940119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
941119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
942119a934aSSatish Balay       v += 9;
943119a934aSSatish Balay     }
9441a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
945119a934aSSatish Balay     z += 3;
946119a934aSSatish Balay   }
947c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
948c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
94939b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
95039b95ed1SBarry Smith   return 0;
95139b95ed1SBarry Smith }
95239b95ed1SBarry Smith 
9535615d1e5SSatish Balay #undef __FUNC__
9545615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
95539b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
95639b95ed1SBarry Smith {
95739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
95839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
95939b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
96039b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
961c16cb8f2SBarry Smith   int             j,n;
96239b95ed1SBarry Smith 
963c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
964c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
96539b95ed1SBarry Smith 
96639b95ed1SBarry Smith   idx   = a->j;
96739b95ed1SBarry Smith   v     = a->a;
96839b95ed1SBarry Smith   ii    = a->i;
96939b95ed1SBarry Smith 
970119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
971119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
972119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
973119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
974119a934aSSatish Balay       xb = x + 4*(*idx++);
975119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
976119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
977119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
978119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
979119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
980119a934aSSatish Balay       v += 16;
981119a934aSSatish Balay     }
9821a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
983119a934aSSatish Balay     z += 4;
984119a934aSSatish Balay   }
985c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
986c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
98739b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
98839b95ed1SBarry Smith   return 0;
98939b95ed1SBarry Smith }
99039b95ed1SBarry Smith 
9915615d1e5SSatish Balay #undef __FUNC__
9925615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
99339b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
99439b95ed1SBarry Smith {
99539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
99639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
99739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
998c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
99939b95ed1SBarry Smith 
1000c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1001c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
100239b95ed1SBarry Smith 
100339b95ed1SBarry Smith   idx   = a->j;
100439b95ed1SBarry Smith   v     = a->a;
100539b95ed1SBarry Smith   ii    = a->i;
100639b95ed1SBarry Smith 
1007119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1008119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1009119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1010119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1011119a934aSSatish Balay       xb = x + 5*(*idx++);
1012119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1013119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1014119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1015119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1016119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1017119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1018119a934aSSatish Balay       v += 25;
1019119a934aSSatish Balay     }
10201a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1021119a934aSSatish Balay     z += 5;
1022119a934aSSatish Balay   }
1023c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1024c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
102539b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
102639b95ed1SBarry Smith   return 0;
102739b95ed1SBarry Smith }
102839b95ed1SBarry Smith 
10295615d1e5SSatish Balay #undef __FUNC__
103048e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
103148e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
103248e9ddb2SSatish Balay {
103348e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
103448e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
103548e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
103648e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
103748e9ddb2SSatish Balay 
103848e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
103948e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
104048e9ddb2SSatish Balay 
104148e9ddb2SSatish Balay   idx   = a->j;
104248e9ddb2SSatish Balay   v     = a->a;
104348e9ddb2SSatish Balay   ii    = a->i;
104448e9ddb2SSatish Balay 
104548e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
104648e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
104748e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
104848e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
104948e9ddb2SSatish Balay       xb = x + 7*(*idx++);
105048e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
105148e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
105248e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
105348e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
105448e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
105548e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
105648e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
105748e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
105848e9ddb2SSatish Balay       v += 49;
105948e9ddb2SSatish Balay     }
106048e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
106148e9ddb2SSatish Balay     z += 7;
106248e9ddb2SSatish Balay   }
106348e9ddb2SSatish Balay 
106448e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
106548e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
106648e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
106748e9ddb2SSatish Balay   return 0;
106848e9ddb2SSatish Balay }
106948e9ddb2SSatish Balay 
107048e9ddb2SSatish Balay #undef __FUNC__
10715615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
107239b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
107339b95ed1SBarry Smith {
107439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
107539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1076c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1077c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1078c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
107939b95ed1SBarry Smith 
1080c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1081c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
108239b95ed1SBarry Smith 
108339b95ed1SBarry Smith   idx   = a->j;
108439b95ed1SBarry Smith   v     = a->a;
108539b95ed1SBarry Smith   ii    = a->i;
108639b95ed1SBarry Smith 
108739b95ed1SBarry Smith 
1088119a934aSSatish Balay   if (!a->mult_work) {
10893b547af2SSatish Balay     k = PetscMax(a->m,a->n);
10902b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1091119a934aSSatish Balay   }
1092119a934aSSatish Balay   work = a->mult_work;
1093119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1094119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1095119a934aSSatish Balay     ncols = n*bs;
1096119a934aSSatish Balay     workt = work;
1097119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1098119a934aSSatish Balay       xb = x + bs*(*idx++);
1099119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1100119a934aSSatish Balay       workt += bs;
1101119a934aSSatish Balay     }
11021a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1103119a934aSSatish Balay     v += n*bs2;
1104119a934aSSatish Balay     z += bs;
1105119a934aSSatish Balay   }
1106c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1107c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11081a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1109bb42667fSSatish Balay   return 0;
1110bb42667fSSatish Balay }
1111bb42667fSSatish Balay 
11125615d1e5SSatish Balay #undef __FUNC__
11135615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1114f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1115f44d3a6dSSatish Balay {
1116f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1117f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1118c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1119f44d3a6dSSatish Balay 
1120c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1121c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1122c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1123f44d3a6dSSatish Balay 
1124f44d3a6dSSatish Balay   idx   = a->j;
1125f44d3a6dSSatish Balay   v     = a->a;
1126f44d3a6dSSatish Balay   ii    = a->i;
1127f44d3a6dSSatish Balay 
1128f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1129f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1130f44d3a6dSSatish Balay     sum  = y[i];
1131f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1132f44d3a6dSSatish Balay     z[i] = sum;
1133f44d3a6dSSatish Balay   }
1134c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1135c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1136c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1137f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1138f44d3a6dSSatish Balay   return 0;
1139f44d3a6dSSatish Balay }
1140f44d3a6dSSatish Balay 
11415615d1e5SSatish Balay #undef __FUNC__
11425615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1143f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1144f44d3a6dSSatish Balay {
1145f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1146f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1147f44d3a6dSSatish Balay   register Scalar x1,x2;
1148f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1149c16cb8f2SBarry Smith   int             j,n;
1150f44d3a6dSSatish Balay 
1151c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1152c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1153c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1154f44d3a6dSSatish Balay 
1155f44d3a6dSSatish Balay   idx   = a->j;
1156f44d3a6dSSatish Balay   v     = a->a;
1157f44d3a6dSSatish Balay   ii    = a->i;
1158f44d3a6dSSatish Balay 
1159f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1160f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1161f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1162f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1163f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1164f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1165f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1166f44d3a6dSSatish Balay       v += 4;
1167f44d3a6dSSatish Balay     }
1168f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1169f44d3a6dSSatish Balay     z += 2; y += 2;
1170f44d3a6dSSatish Balay   }
1171c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1172c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1173c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1174f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1175f44d3a6dSSatish Balay   return 0;
1176f44d3a6dSSatish Balay }
1177f44d3a6dSSatish Balay 
11785615d1e5SSatish Balay #undef __FUNC__
11795615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1180f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1181f44d3a6dSSatish Balay {
1182f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1183f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1184c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1185f44d3a6dSSatish Balay 
1186c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1187c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1188c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1189f44d3a6dSSatish Balay 
1190f44d3a6dSSatish Balay   idx   = a->j;
1191f44d3a6dSSatish Balay   v     = a->a;
1192f44d3a6dSSatish Balay   ii    = a->i;
1193f44d3a6dSSatish Balay 
1194f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1195f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1196f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1197f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1198f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1199f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1200f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1201f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1202f44d3a6dSSatish Balay       v += 9;
1203f44d3a6dSSatish Balay     }
1204f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1205f44d3a6dSSatish Balay     z += 3; y += 3;
1206f44d3a6dSSatish Balay   }
1207c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1208c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1209c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1210f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1211f44d3a6dSSatish Balay   return 0;
1212f44d3a6dSSatish Balay }
1213f44d3a6dSSatish Balay 
12145615d1e5SSatish Balay #undef __FUNC__
12155615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1216f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1217f44d3a6dSSatish Balay {
1218f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1219f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1220f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1221f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1222c16cb8f2SBarry Smith   int             j,n;
1223f44d3a6dSSatish Balay 
1224c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1225c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1226c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1227f44d3a6dSSatish Balay 
1228f44d3a6dSSatish Balay   idx   = a->j;
1229f44d3a6dSSatish Balay   v     = a->a;
1230f44d3a6dSSatish Balay   ii    = a->i;
1231f44d3a6dSSatish Balay 
1232f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1233f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1234f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1235f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1236f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1237f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1238f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1239f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1240f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1241f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1242f44d3a6dSSatish Balay       v += 16;
1243f44d3a6dSSatish Balay     }
1244f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1245f44d3a6dSSatish Balay     z += 4; y += 4;
1246f44d3a6dSSatish Balay   }
1247c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1248c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1249c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1250f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1251f44d3a6dSSatish Balay   return 0;
1252f44d3a6dSSatish Balay }
1253f44d3a6dSSatish Balay 
12545615d1e5SSatish Balay #undef __FUNC__
12555615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1256f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1257f44d3a6dSSatish Balay {
1258f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1259f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1260f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1261c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1262f44d3a6dSSatish Balay 
1263c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1264c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1265c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1266f44d3a6dSSatish Balay 
1267f44d3a6dSSatish Balay   idx   = a->j;
1268f44d3a6dSSatish Balay   v     = a->a;
1269f44d3a6dSSatish Balay   ii    = a->i;
1270f44d3a6dSSatish Balay 
1271f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1272f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1273f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1274f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1275f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1276f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1277f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1278f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1279f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1280f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1281f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1282f44d3a6dSSatish Balay       v += 25;
1283f44d3a6dSSatish Balay     }
1284f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1285f44d3a6dSSatish Balay     z += 5; y += 5;
1286f44d3a6dSSatish Balay   }
1287c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1288c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1289c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1290f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1291f44d3a6dSSatish Balay   return 0;
1292f44d3a6dSSatish Balay }
1293f44d3a6dSSatish Balay 
12945615d1e5SSatish Balay #undef __FUNC__
129548e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
129648e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
129748e9ddb2SSatish Balay {
129848e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
129948e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
130048e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
130148e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
130248e9ddb2SSatish Balay 
130348e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
130448e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
130548e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
130648e9ddb2SSatish Balay 
130748e9ddb2SSatish Balay   idx   = a->j;
130848e9ddb2SSatish Balay   v     = a->a;
130948e9ddb2SSatish Balay   ii    = a->i;
131048e9ddb2SSatish Balay 
131148e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
131248e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
131348e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
131448e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
131548e9ddb2SSatish Balay       xb = x + 7*(*idx++);
131648e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
131748e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
131848e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
131948e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
132048e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
132148e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
132248e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
132348e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
132448e9ddb2SSatish Balay       v += 49;
132548e9ddb2SSatish Balay     }
132648e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
132748e9ddb2SSatish Balay     z += 7; y += 7;
132848e9ddb2SSatish Balay   }
132948e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
133048e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
133148e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
133248e9ddb2SSatish Balay   PLogFlops(98*a->nz);
133348e9ddb2SSatish Balay   return 0;
133448e9ddb2SSatish Balay }
133548e9ddb2SSatish Balay 
133648e9ddb2SSatish Balay 
133748e9ddb2SSatish Balay #undef __FUNC__
13385615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1339f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1340f44d3a6dSSatish Balay {
1341f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1342f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1343f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1344f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1345f44d3a6dSSatish Balay 
1346f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1347f44d3a6dSSatish Balay 
1348c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1349c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1350f44d3a6dSSatish Balay 
1351f44d3a6dSSatish Balay   idx   = a->j;
1352f44d3a6dSSatish Balay   v     = a->a;
1353f44d3a6dSSatish Balay   ii    = a->i;
1354f44d3a6dSSatish Balay 
1355f44d3a6dSSatish Balay 
1356f44d3a6dSSatish Balay   if (!a->mult_work) {
1357f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1358f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1359f44d3a6dSSatish Balay   }
1360f44d3a6dSSatish Balay   work = a->mult_work;
1361f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1362f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1363f44d3a6dSSatish Balay     ncols = n*bs;
1364f44d3a6dSSatish Balay     workt = work;
1365f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1366f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1367f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1368f44d3a6dSSatish Balay       workt += bs;
1369f44d3a6dSSatish Balay     }
1370f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1371f44d3a6dSSatish Balay     v += n*bs2;
1372f44d3a6dSSatish Balay     z += bs;
1373f44d3a6dSSatish Balay   }
1374c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1375c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1376f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1377f44d3a6dSSatish Balay   return 0;
1378f44d3a6dSSatish Balay }
1379f44d3a6dSSatish Balay 
13805615d1e5SSatish Balay #undef __FUNC__
13815615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
13821a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1383bb42667fSSatish Balay {
1384bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
13851a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1386bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1387bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
13889df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1389119a934aSSatish Balay 
1390119a934aSSatish Balay 
139190f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
139290f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1393bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1394bb42667fSSatish Balay 
1395119a934aSSatish Balay   idx   = a->j;
1396119a934aSSatish Balay   v     = a->a;
1397119a934aSSatish Balay   ii    = a->i;
1398119a934aSSatish Balay 
1399119a934aSSatish Balay   switch (bs) {
1400119a934aSSatish Balay   case 1:
1401119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1402119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1403119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1404119a934aSSatish Balay       ib = idx + ai[i];
1405119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1406bb1453f0SSatish Balay         rval    = ib[j];
1407bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1408119a934aSSatish Balay       }
1409119a934aSSatish Balay     }
1410119a934aSSatish Balay     break;
1411119a934aSSatish Balay   case 2:
1412119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1413119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1414119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1415119a934aSSatish Balay       ib = idx + ai[i];
1416119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1417119a934aSSatish Balay         rval      = ib[j]*2;
1418bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1419bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1420119a934aSSatish Balay         v += 4;
1421119a934aSSatish Balay       }
1422119a934aSSatish Balay     }
1423119a934aSSatish Balay     break;
1424119a934aSSatish Balay   case 3:
1425119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1426119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1427119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1428119a934aSSatish Balay       ib = idx + ai[i];
1429119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1430119a934aSSatish Balay         rval      = ib[j]*3;
1431bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1432bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1433bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1434119a934aSSatish Balay         v += 9;
1435119a934aSSatish Balay       }
1436119a934aSSatish Balay     }
1437119a934aSSatish Balay     break;
1438119a934aSSatish Balay   case 4:
1439119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1440119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1441119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1442119a934aSSatish Balay       ib = idx + ai[i];
1443119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1444119a934aSSatish Balay         rval      = ib[j]*4;
1445bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1446bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1447bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1448bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1449119a934aSSatish Balay         v += 16;
1450119a934aSSatish Balay       }
1451119a934aSSatish Balay     }
1452119a934aSSatish Balay     break;
1453119a934aSSatish Balay   case 5:
1454119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1455119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1456119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1457119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1458119a934aSSatish Balay       ib = idx + ai[i];
1459119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1460119a934aSSatish Balay         rval      = ib[j]*5;
1461bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1462bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1463bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1464bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1465bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1466119a934aSSatish Balay         v += 25;
1467119a934aSSatish Balay       }
1468119a934aSSatish Balay     }
1469119a934aSSatish Balay     break;
1470119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1471119a934aSSatish Balay     default: {
1472119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1473119a934aSSatish Balay       if (!a->mult_work) {
14743b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1475bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1476119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1477119a934aSSatish Balay       }
1478119a934aSSatish Balay       work = a->mult_work;
1479119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1480119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1481119a934aSSatish Balay         ncols = n*bs;
1482119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1483119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1484119a934aSSatish Balay         v += n*bs2;
1485119a934aSSatish Balay         x += bs;
1486119a934aSSatish Balay         workt = work;
1487119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1488119a934aSSatish Balay           zb = z + bs*(*idx++);
1489bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1490119a934aSSatish Balay           workt += bs;
1491119a934aSSatish Balay         }
1492119a934aSSatish Balay       }
1493119a934aSSatish Balay     }
1494119a934aSSatish Balay   }
14950419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
14960419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1497faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1498faf6580fSSatish Balay   return 0;
1499faf6580fSSatish Balay }
15001c351548SSatish Balay 
15015615d1e5SSatish Balay #undef __FUNC__
15025615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1503faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1504faf6580fSSatish Balay {
1505faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1506faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1507faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1508faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1509faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1510faf6580fSSatish Balay 
1511faf6580fSSatish Balay 
1512faf6580fSSatish Balay 
151390f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
151490f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1515faf6580fSSatish Balay 
1516648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1517648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1518648c1d95SSatish Balay 
1519faf6580fSSatish Balay 
1520faf6580fSSatish Balay   idx   = a->j;
1521faf6580fSSatish Balay   v     = a->a;
1522faf6580fSSatish Balay   ii    = a->i;
1523faf6580fSSatish Balay 
1524faf6580fSSatish Balay   switch (bs) {
1525faf6580fSSatish Balay   case 1:
1526faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1527faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1528faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1529faf6580fSSatish Balay       ib = idx + ai[i];
1530faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1531faf6580fSSatish Balay         rval    = ib[j];
1532faf6580fSSatish Balay         z[rval] += *v++ * x1;
1533faf6580fSSatish Balay       }
1534faf6580fSSatish Balay     }
1535faf6580fSSatish Balay     break;
1536faf6580fSSatish Balay   case 2:
1537faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1538faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1539faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1540faf6580fSSatish Balay       ib = idx + ai[i];
1541faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1542faf6580fSSatish Balay         rval      = ib[j]*2;
1543faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1544faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1545faf6580fSSatish Balay         v += 4;
1546faf6580fSSatish Balay       }
1547faf6580fSSatish Balay     }
1548faf6580fSSatish Balay     break;
1549faf6580fSSatish Balay   case 3:
1550faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1551faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1552faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1553faf6580fSSatish Balay       ib = idx + ai[i];
1554faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1555faf6580fSSatish Balay         rval      = ib[j]*3;
1556faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1557faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1558faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1559faf6580fSSatish Balay         v += 9;
1560faf6580fSSatish Balay       }
1561faf6580fSSatish Balay     }
1562faf6580fSSatish Balay     break;
1563faf6580fSSatish Balay   case 4:
1564faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1565faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1566faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1567faf6580fSSatish Balay       ib = idx + ai[i];
1568faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1569faf6580fSSatish Balay         rval      = ib[j]*4;
1570faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1571faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1572faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1573faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1574faf6580fSSatish Balay         v += 16;
1575faf6580fSSatish Balay       }
1576faf6580fSSatish Balay     }
1577faf6580fSSatish Balay     break;
1578faf6580fSSatish Balay   case 5:
1579faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1580faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1581faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1582faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1583faf6580fSSatish Balay       ib = idx + ai[i];
1584faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1585faf6580fSSatish Balay         rval      = ib[j]*5;
1586faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1587faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1588faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1589faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1590faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1591faf6580fSSatish Balay         v += 25;
1592faf6580fSSatish Balay       }
1593faf6580fSSatish Balay     }
1594faf6580fSSatish Balay     break;
1595faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1596faf6580fSSatish Balay     default: {
1597faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1598faf6580fSSatish Balay       if (!a->mult_work) {
1599faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1600faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1601faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1602faf6580fSSatish Balay       }
1603faf6580fSSatish Balay       work = a->mult_work;
1604faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1605faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1606faf6580fSSatish Balay         ncols = n*bs;
1607faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1608faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1609faf6580fSSatish Balay         v += n*bs2;
1610faf6580fSSatish Balay         x += bs;
1611faf6580fSSatish Balay         workt = work;
1612faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1613faf6580fSSatish Balay           zb = z + bs*(*idx++);
1614faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1615faf6580fSSatish Balay           workt += bs;
1616faf6580fSSatish Balay         }
1617faf6580fSSatish Balay       }
1618faf6580fSSatish Balay     }
1619faf6580fSSatish Balay   }
1620faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1621faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1622faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16232593348eSBarry Smith   return 0;
16242593348eSBarry Smith }
16252593348eSBarry Smith 
16265615d1e5SSatish Balay #undef __FUNC__
16275615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_SeqBAIJ"
16284e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16292593348eSBarry Smith {
1630b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16314e220ebcSLois Curfman McInnes 
16324e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16334e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16344e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16354e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16364e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16374e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16384e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16394e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16404e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16414e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16424e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16434e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16444e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16454e220ebcSLois Curfman McInnes   if (A->factor) {
16464e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16474e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16484e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16494e220ebcSLois Curfman McInnes   } else {
16504e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16514e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16524e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16534e220ebcSLois Curfman McInnes   }
16542593348eSBarry Smith   return 0;
16552593348eSBarry Smith }
16562593348eSBarry Smith 
16575615d1e5SSatish Balay #undef __FUNC__
16585615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
165991d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
166091d316f6SSatish Balay {
166191d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
166291d316f6SSatish Balay 
1663e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
166491d316f6SSatish Balay 
166591d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
166691d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1667a7c10996SSatish Balay       (a->nz != b->nz)) {
166891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
166991d316f6SSatish Balay   }
167091d316f6SSatish Balay 
167191d316f6SSatish Balay   /* if the a->i are the same */
167291d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
167391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
167491d316f6SSatish Balay   }
167591d316f6SSatish Balay 
167691d316f6SSatish Balay   /* if a->j are the same */
167791d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
167891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
167991d316f6SSatish Balay   }
168091d316f6SSatish Balay 
168191d316f6SSatish Balay   /* if a->a are the same */
168291d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
168391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168491d316f6SSatish Balay   }
168591d316f6SSatish Balay   *flg = PETSC_TRUE;
168691d316f6SSatish Balay   return 0;
168791d316f6SSatish Balay 
168891d316f6SSatish Balay }
168991d316f6SSatish Balay 
16905615d1e5SSatish Balay #undef __FUNC__
16915615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
169291d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
169391d316f6SSatish Balay {
169491d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16957e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
169617e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
169717e48fc4SSatish Balay 
16980513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
169917e48fc4SSatish Balay   bs   = a->bs;
170017e48fc4SSatish Balay   aa   = a->a;
170117e48fc4SSatish Balay   ai   = a->i;
170217e48fc4SSatish Balay   aj   = a->j;
170317e48fc4SSatish Balay   ambs = a->mbs;
17049df24120SSatish Balay   bs2  = a->bs2;
170591d316f6SSatish Balay 
170691d316f6SSatish Balay   VecSet(&zero,v);
170790f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1708e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
170917e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
171017e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
171117e48fc4SSatish Balay       if (aj[j] == i) {
171217e48fc4SSatish Balay         row  = i*bs;
17137e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17147e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
171591d316f6SSatish Balay         break;
171691d316f6SSatish Balay       }
171791d316f6SSatish Balay     }
171891d316f6SSatish Balay   }
171991d316f6SSatish Balay   return 0;
172091d316f6SSatish Balay }
172191d316f6SSatish Balay 
17225615d1e5SSatish Balay #undef __FUNC__
17235615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17249867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17259867e207SSatish Balay {
17269867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17279867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17287e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17299867e207SSatish Balay 
17309867e207SSatish Balay   ai  = a->i;
17319867e207SSatish Balay   aj  = a->j;
17329867e207SSatish Balay   aa  = a->a;
17339867e207SSatish Balay   m   = a->m;
17349867e207SSatish Balay   n   = a->n;
17359867e207SSatish Balay   bs  = a->bs;
17369867e207SSatish Balay   mbs = a->mbs;
17379df24120SSatish Balay   bs2 = a->bs2;
17389867e207SSatish Balay   if (ll) {
173990f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1740e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17419867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17429867e207SSatish Balay       M  = ai[i+1] - ai[i];
17439867e207SSatish Balay       li = l + i*bs;
17447e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17459867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17467e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17479867e207SSatish Balay           (*v++) *= li[k%bs];
17489867e207SSatish Balay         }
17499867e207SSatish Balay       }
17509867e207SSatish Balay     }
17519867e207SSatish Balay   }
17529867e207SSatish Balay 
17539867e207SSatish Balay   if (rr) {
175490f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1755e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17569867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17579867e207SSatish Balay       M  = ai[i+1] - ai[i];
17587e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17599867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17609867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17619867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17629867e207SSatish Balay           x = ri[k];
17639867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17649867e207SSatish Balay         }
17659867e207SSatish Balay       }
17669867e207SSatish Balay     }
17679867e207SSatish Balay   }
17689867e207SSatish Balay   return 0;
17699867e207SSatish Balay }
17709867e207SSatish Balay 
17719867e207SSatish Balay 
1772b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1773b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17742a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1775736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1776736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17771a6a6d98SBarry Smith 
17781a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17791a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
17801a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
17811a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
17821a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
17831a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
178448e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
17851a6a6d98SBarry Smith 
17867fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
17877fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
17887fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
17897fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
17907fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
17917fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
17922593348eSBarry Smith 
17935615d1e5SSatish Balay #undef __FUNC__
17945615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
1795b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
17962593348eSBarry Smith {
1797b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17982593348eSBarry Smith   Scalar      *v = a->a;
17992593348eSBarry Smith   double      sum = 0.0;
18009df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18012593348eSBarry Smith 
18022593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18030419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18042593348eSBarry Smith #if defined(PETSC_COMPLEX)
18052593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18062593348eSBarry Smith #else
18072593348eSBarry Smith       sum += (*v)*(*v); v++;
18082593348eSBarry Smith #endif
18092593348eSBarry Smith     }
18102593348eSBarry Smith     *norm = sqrt(sum);
18112593348eSBarry Smith   }
18122593348eSBarry Smith   else {
1813e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18142593348eSBarry Smith   }
18152593348eSBarry Smith   return 0;
18162593348eSBarry Smith }
18172593348eSBarry Smith 
18182593348eSBarry Smith /*
18192593348eSBarry Smith      note: This can only work for identity for row and col. It would
18202593348eSBarry Smith    be good to check this and otherwise generate an error.
18212593348eSBarry Smith */
18225615d1e5SSatish Balay #undef __FUNC__
18235615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
1824b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18252593348eSBarry Smith {
1826b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18272593348eSBarry Smith   Mat         outA;
1828de6a44a3SBarry Smith   int         ierr;
18292593348eSBarry Smith 
1830e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18312593348eSBarry Smith 
18322593348eSBarry Smith   outA          = inA;
18332593348eSBarry Smith   inA->factor   = FACTOR_LU;
18342593348eSBarry Smith   a->row        = row;
18352593348eSBarry Smith   a->col        = col;
18362593348eSBarry Smith 
1837de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
18382593348eSBarry Smith 
18392593348eSBarry Smith   if (!a->diag) {
1840b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18412593348eSBarry Smith   }
18427fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18432593348eSBarry Smith   return 0;
18442593348eSBarry Smith }
18452593348eSBarry Smith 
18465615d1e5SSatish Balay #undef __FUNC__
18475615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
1848b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18492593348eSBarry Smith {
1850b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18519df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1852b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1853b6490206SBarry Smith   PLogFlops(totalnz);
18542593348eSBarry Smith   return 0;
18552593348eSBarry Smith }
18562593348eSBarry Smith 
18575615d1e5SSatish Balay #undef __FUNC__
18585615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18597e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18607e67e3f9SSatish Balay {
18617e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18627e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1863a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18649df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18657e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18667e67e3f9SSatish Balay 
18677e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
18687e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1869e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1870e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1871a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
18727e67e3f9SSatish Balay     nrow = ailen[brow];
18737e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1874e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1875e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1876a7c10996SSatish Balay       col  = in[l] ;
18777e67e3f9SSatish Balay       bcol = col/bs;
18787e67e3f9SSatish Balay       cidx = col%bs;
18797e67e3f9SSatish Balay       ridx = row%bs;
18807e67e3f9SSatish Balay       high = nrow;
18817e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
18827e67e3f9SSatish Balay       while (high-low > 5) {
18837e67e3f9SSatish Balay         t = (low+high)/2;
18847e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
18857e67e3f9SSatish Balay         else             low  = t;
18867e67e3f9SSatish Balay       }
18877e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
18887e67e3f9SSatish Balay         if (rp[i] > bcol) break;
18897e67e3f9SSatish Balay         if (rp[i] == bcol) {
18907e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18917e67e3f9SSatish Balay           goto finished;
18927e67e3f9SSatish Balay         }
18937e67e3f9SSatish Balay       }
18947e67e3f9SSatish Balay       *v++ = zero;
18957e67e3f9SSatish Balay       finished:;
18967e67e3f9SSatish Balay     }
18977e67e3f9SSatish Balay   }
18987e67e3f9SSatish Balay   return 0;
18997e67e3f9SSatish Balay }
19007e67e3f9SSatish Balay 
19015615d1e5SSatish Balay #undef __FUNC__
19025615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
19035a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19045a838052SSatish Balay {
19055a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19065a838052SSatish Balay   *bs = baij->bs;
19075a838052SSatish Balay   return 0;
19085a838052SSatish Balay }
19095a838052SSatish Balay 
1910d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19115615d1e5SSatish Balay #undef __FUNC__
19125615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1913d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1914d9b7c43dSSatish Balay {
1915d9b7c43dSSatish Balay   int i,row;
1916d9b7c43dSSatish Balay   row = idx[0];
1917d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1918d9b7c43dSSatish Balay 
1919d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1920d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1921d9b7c43dSSatish Balay   }
1922d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1923d9b7c43dSSatish Balay   return 0;
1924d9b7c43dSSatish Balay }
1925d9b7c43dSSatish Balay 
19265615d1e5SSatish Balay #undef __FUNC__
19275615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
1928d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1929d9b7c43dSSatish Balay {
1930d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1931d9b7c43dSSatish Balay   IS          is_local;
1932d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1933d9b7c43dSSatish Balay   PetscTruth  flg;
1934d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1935d9b7c43dSSatish Balay 
1936d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1937d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1938d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1939537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1940d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1941d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1942d9b7c43dSSatish Balay 
1943d9b7c43dSSatish Balay   i = 0;
1944d9b7c43dSSatish Balay   while (i < is_n) {
1945e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1946d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1947d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1948d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1949d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1950d9b7c43dSSatish Balay       i += bs;
1951d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1952d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1953d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1954d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1955d9b7c43dSSatish Balay         aa[0] = zero;
1956d9b7c43dSSatish Balay         aa+=bs;
1957d9b7c43dSSatish Balay       }
1958d9b7c43dSSatish Balay       i++;
1959d9b7c43dSSatish Balay     }
1960d9b7c43dSSatish Balay   }
1961d9b7c43dSSatish Balay   if (diag) {
1962d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1963d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1964d9b7c43dSSatish Balay     }
1965d9b7c43dSSatish Balay   }
1966d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1967d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1968d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
19699a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1970d9b7c43dSSatish Balay 
1971d9b7c43dSSatish Balay   return 0;
1972d9b7c43dSSatish Balay }
19731c351548SSatish Balay 
19745615d1e5SSatish Balay #undef __FUNC__
19755615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1976ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1977ba4ca20aSSatish Balay {
1978ba4ca20aSSatish Balay   static int called = 0;
1979ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1980ba4ca20aSSatish Balay 
1981ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1982ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1983ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1984ba4ca20aSSatish Balay   return 0;
1985ba4ca20aSSatish Balay }
1986d9b7c43dSSatish Balay 
19872593348eSBarry Smith /* -------------------------------------------------------------------*/
1988cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
19899867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1990f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1991faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
19921a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1993b6490206SBarry Smith        0,0,
1994de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1995b6490206SBarry Smith        0,
1996f2501298SSatish Balay        MatTranspose_SeqBAIJ,
199717e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
19989867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1999584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2000b6490206SBarry Smith        0,
2001d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20027fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2003b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2004de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2005d402145bSBarry Smith        0,0,
2006b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2007b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2008af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20097e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2010ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20113b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20123b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
201392c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
201492c4ed94SBarry Smith        0,0,0,0,0,0,
201592c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20162593348eSBarry Smith 
20175615d1e5SSatish Balay #undef __FUNC__
20185615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20192593348eSBarry Smith /*@C
202044cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
202144cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
202244cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20232bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20242bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20252593348eSBarry Smith 
20262593348eSBarry Smith    Input Parameters:
20272593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
2028b6490206SBarry Smith .  bs - size of block
20292593348eSBarry Smith .  m - number of rows
20302593348eSBarry Smith .  n - number of columns
2031b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20322bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20332bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20342593348eSBarry Smith 
20352593348eSBarry Smith    Output Parameter:
20362593348eSBarry Smith .  A - the matrix
20372593348eSBarry Smith 
20380513a670SBarry Smith    Options Database Keys:
20390513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20400513a670SBarry Smith $                     block calculations (much solver)
20410513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20420513a670SBarry Smith 
20432593348eSBarry Smith    Notes:
204444cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20452593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
204644cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20472593348eSBarry Smith 
20482593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20492593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20502593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20516da5968aSLois Curfman McInnes    matrices.
20522593348eSBarry Smith 
205344cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20542593348eSBarry Smith @*/
2055b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20562593348eSBarry Smith {
20572593348eSBarry Smith   Mat         B;
2058b6490206SBarry Smith   Mat_SeqBAIJ *b;
20593b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20603b2fbd54SBarry Smith 
20613b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2062e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2063b6490206SBarry Smith 
20640513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20650513a670SBarry Smith 
2066f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2067e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
20682593348eSBarry Smith 
20692593348eSBarry Smith   *A = 0;
2070b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
20712593348eSBarry Smith   PLogObjectCreate(B);
2072b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
207344cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
20742593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
20751a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
20761a6a6d98SBarry Smith   if (!flg) {
20777fc0212eSBarry Smith     switch (bs) {
20787fc0212eSBarry Smith     case 1:
20797fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20801a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
208139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2082f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
20837fc0212eSBarry Smith       break;
20844eeb42bcSBarry Smith     case 2:
20854eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
20861a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
208739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2088f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
20896d84be18SBarry Smith       break;
2090f327dd97SBarry Smith     case 3:
2091f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
20921a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
209339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2094f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
20954eeb42bcSBarry Smith       break;
20966d84be18SBarry Smith     case 4:
20976d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
20981a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
209939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2100f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21016d84be18SBarry Smith       break;
21026d84be18SBarry Smith     case 5:
21036d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21041a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
210539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2106f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21076d84be18SBarry Smith       break;
210848e9ddb2SSatish Balay     case 7:
210948e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
211048e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
211148e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
211248e9ddb2SSatish Balay       break;
21137fc0212eSBarry Smith     }
21141a6a6d98SBarry Smith   }
2115b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2116b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21172593348eSBarry Smith   B->factor           = 0;
21182593348eSBarry Smith   B->lupivotthreshold = 1.0;
211990f02eecSBarry Smith   B->mapping          = 0;
21202593348eSBarry Smith   b->row              = 0;
21212593348eSBarry Smith   b->col              = 0;
21222593348eSBarry Smith   b->reallocs         = 0;
21232593348eSBarry Smith 
212444cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
212544cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2126b6490206SBarry Smith   b->mbs     = mbs;
2127f2501298SSatish Balay   b->nbs     = nbs;
2128b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21292593348eSBarry Smith   if (nnz == PETSC_NULL) {
2130119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21312593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2132b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2133b6490206SBarry Smith     nz = nz*mbs;
21342593348eSBarry Smith   }
21352593348eSBarry Smith   else {
21362593348eSBarry Smith     nz = 0;
2137b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21382593348eSBarry Smith   }
21392593348eSBarry Smith 
21402593348eSBarry Smith   /* allocate the matrix space */
21417e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21422593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21437e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21447e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21452593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21462593348eSBarry Smith   b->i  = b->j + nz;
21472593348eSBarry Smith   b->singlemalloc = 1;
21482593348eSBarry Smith 
2149de6a44a3SBarry Smith   b->i[0] = 0;
2150b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21512593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21522593348eSBarry Smith   }
21532593348eSBarry Smith 
2154b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2155b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2156b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2157b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21582593348eSBarry Smith 
2159b6490206SBarry Smith   b->bs               = bs;
21609df24120SSatish Balay   b->bs2              = bs2;
2161b6490206SBarry Smith   b->mbs              = mbs;
21622593348eSBarry Smith   b->nz               = 0;
216318e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21642593348eSBarry Smith   b->sorted           = 0;
21652593348eSBarry Smith   b->roworiented      = 1;
21662593348eSBarry Smith   b->nonew            = 0;
21672593348eSBarry Smith   b->diag             = 0;
21682593348eSBarry Smith   b->solve_work       = 0;
2169de6a44a3SBarry Smith   b->mult_work        = 0;
21702593348eSBarry Smith   b->spptr            = 0;
21714e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
21724e220ebcSLois Curfman McInnes 
21732593348eSBarry Smith   *A = B;
21742593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
21752593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
21762593348eSBarry Smith   return 0;
21772593348eSBarry Smith }
21782593348eSBarry Smith 
21795615d1e5SSatish Balay #undef __FUNC__
21805615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2181b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
21822593348eSBarry Smith {
21832593348eSBarry Smith   Mat         C;
2184b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
21859df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2186de6a44a3SBarry Smith 
2187e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
21882593348eSBarry Smith 
21892593348eSBarry Smith   *B = 0;
2190b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
21912593348eSBarry Smith   PLogObjectCreate(C);
2192b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
21932593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2194b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2195b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
21962593348eSBarry Smith   C->factor     = A->factor;
21972593348eSBarry Smith   c->row        = 0;
21982593348eSBarry Smith   c->col        = 0;
21992593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22002593348eSBarry Smith 
220144cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
220244cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
220344cd7ae7SLois Curfman McInnes   C->M          = a->m;
220444cd7ae7SLois Curfman McInnes   C->N          = a->n;
220544cd7ae7SLois Curfman McInnes 
2206b6490206SBarry Smith   c->bs         = a->bs;
22079df24120SSatish Balay   c->bs2        = a->bs2;
2208b6490206SBarry Smith   c->mbs        = a->mbs;
2209b6490206SBarry Smith   c->nbs        = a->nbs;
22102593348eSBarry Smith 
2211b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2212b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2213b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22142593348eSBarry Smith     c->imax[i] = a->imax[i];
22152593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22162593348eSBarry Smith   }
22172593348eSBarry Smith 
22182593348eSBarry Smith   /* allocate the matrix space */
22192593348eSBarry Smith   c->singlemalloc = 1;
22207e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22212593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22227e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2223de6a44a3SBarry Smith   c->i  = c->j + nz;
2224b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2225b6490206SBarry Smith   if (mbs > 0) {
2226de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22272593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22287e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22292593348eSBarry Smith     }
22302593348eSBarry Smith   }
22312593348eSBarry Smith 
2232b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
22332593348eSBarry Smith   c->sorted      = a->sorted;
22342593348eSBarry Smith   c->roworiented = a->roworiented;
22352593348eSBarry Smith   c->nonew       = a->nonew;
22362593348eSBarry Smith 
22372593348eSBarry Smith   if (a->diag) {
2238b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2239b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2240b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22412593348eSBarry Smith       c->diag[i] = a->diag[i];
22422593348eSBarry Smith     }
22432593348eSBarry Smith   }
22442593348eSBarry Smith   else c->diag          = 0;
22452593348eSBarry Smith   c->nz                 = a->nz;
22462593348eSBarry Smith   c->maxnz              = a->maxnz;
22472593348eSBarry Smith   c->solve_work         = 0;
22482593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22497fc0212eSBarry Smith   c->mult_work          = 0;
22502593348eSBarry Smith   *B = C;
22512593348eSBarry Smith   return 0;
22522593348eSBarry Smith }
22532593348eSBarry Smith 
22545615d1e5SSatish Balay #undef __FUNC__
22555615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
225619bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22572593348eSBarry Smith {
2258b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22592593348eSBarry Smith   Mat          B;
2260de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2261b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
226235aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2263a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2264b6490206SBarry Smith   Scalar       *aa;
226519bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22662593348eSBarry Smith 
22671a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2268de6a44a3SBarry Smith   bs2  = bs*bs;
2269b6490206SBarry Smith 
22702593348eSBarry Smith   MPI_Comm_size(comm,&size);
2271e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
227290ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
227377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2274e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
22752593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
22762593348eSBarry Smith 
2277e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
227835aab85fSBarry Smith 
227935aab85fSBarry Smith   /*
228035aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
228135aab85fSBarry Smith     divisible by the blocksize
228235aab85fSBarry Smith   */
2283b6490206SBarry Smith   mbs        = M/bs;
228435aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
228535aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
228635aab85fSBarry Smith   else                  mbs++;
228735aab85fSBarry Smith   if (extra_rows) {
2288537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
228935aab85fSBarry Smith   }
2290b6490206SBarry Smith 
22912593348eSBarry Smith   /* read in row lengths */
229235aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
229377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
229435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
22952593348eSBarry Smith 
2296b6490206SBarry Smith   /* read in column indices */
229735aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
229877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
229935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2300b6490206SBarry Smith 
2301b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2302b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2303b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
230435aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
230535aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
230635aab85fSBarry Smith   masked = mask + mbs;
2307b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2308b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
230935aab85fSBarry Smith     nmask = 0;
2310b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2311b6490206SBarry Smith       kmax = rowlengths[rowcount];
2312b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
231335aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
231435aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2315b6490206SBarry Smith       }
2316b6490206SBarry Smith       rowcount++;
2317b6490206SBarry Smith     }
231835aab85fSBarry Smith     browlengths[i] += nmask;
231935aab85fSBarry Smith     /* zero out the mask elements we set */
232035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2321b6490206SBarry Smith   }
2322b6490206SBarry Smith 
23232593348eSBarry Smith   /* create our matrix */
232435aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
232535aab85fSBarry Smith          CHKERRQ(ierr);
23262593348eSBarry Smith   B = *A;
2327b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23282593348eSBarry Smith 
23292593348eSBarry Smith   /* set matrix "i" values */
2330de6a44a3SBarry Smith   a->i[0] = 0;
2331b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2332b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2333b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23342593348eSBarry Smith   }
2335b6490206SBarry Smith   a->nz         = 0;
2336b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23372593348eSBarry Smith 
2338b6490206SBarry Smith   /* read in nonzero values */
233935aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
234077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
234135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2342b6490206SBarry Smith 
2343b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2344b6490206SBarry Smith   nzcount = 0; jcount = 0;
2345b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2346b6490206SBarry Smith     nzcountb = nzcount;
234735aab85fSBarry Smith     nmask    = 0;
2348b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2349b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2350b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
235135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
235235aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2353b6490206SBarry Smith       }
2354b6490206SBarry Smith       rowcount++;
2355b6490206SBarry Smith     }
2356de6a44a3SBarry Smith     /* sort the masked values */
235777c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2358de6a44a3SBarry Smith 
2359b6490206SBarry Smith     /* set "j" values into matrix */
2360b6490206SBarry Smith     maskcount = 1;
236135aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
236235aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2363de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2364b6490206SBarry Smith     }
2365b6490206SBarry Smith     /* set "a" values into matrix */
2366de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2367b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2368b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2369b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2370de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2371de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2372de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2373de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2374b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2375b6490206SBarry Smith       }
2376b6490206SBarry Smith     }
237735aab85fSBarry Smith     /* zero out the mask elements we set */
237835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2379b6490206SBarry Smith   }
2380e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2381b6490206SBarry Smith 
2382b6490206SBarry Smith   PetscFree(rowlengths);
2383b6490206SBarry Smith   PetscFree(browlengths);
2384b6490206SBarry Smith   PetscFree(aa);
2385b6490206SBarry Smith   PetscFree(jj);
2386b6490206SBarry Smith   PetscFree(mask);
2387b6490206SBarry Smith 
2388b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2389de6a44a3SBarry Smith 
2390de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2391de6a44a3SBarry Smith   if (flg) {
239219bcc07fSBarry Smith     Viewer tviewer;
239319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2394639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
239519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
239619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2397de6a44a3SBarry Smith   }
2398de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2399de6a44a3SBarry Smith   if (flg) {
240019bcc07fSBarry Smith     Viewer tviewer;
240119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2402639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
240319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
240419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2405de6a44a3SBarry Smith   }
2406de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2407de6a44a3SBarry Smith   if (flg) {
240819bcc07fSBarry Smith     Viewer tviewer;
240919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
241019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
241119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2412de6a44a3SBarry Smith   }
2413de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2414de6a44a3SBarry Smith   if (flg) {
241519bcc07fSBarry Smith     Viewer tviewer;
241619bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2417639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
241819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
241919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2420de6a44a3SBarry Smith   }
2421de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2422de6a44a3SBarry Smith   if (flg) {
242319bcc07fSBarry Smith     Viewer tviewer;
242419bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
242519bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
242619bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
242719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2428de6a44a3SBarry Smith   }
24292593348eSBarry Smith   return 0;
24302593348eSBarry Smith }
24312593348eSBarry Smith 
24322593348eSBarry Smith 
24332593348eSBarry Smith 
2434