xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0effe34fefb466e61d8d01451db974785823aa5d)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*0effe34fSLois Curfman McInnes static char vcid[] = "$Id: baij.c,v 1.110 1997/08/22 15:14:29 bsmith Exp curfman $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
93369ce9aSBarry Smith 
103369ce9aSBarry Smith #include "pinclude/pviewer.h"
113369ce9aSBarry Smith #include "sys.h"
1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
131a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
141a6a6d98SBarry Smith #include "src/inline/spops.h"
152593348eSBarry Smith #include "petsc.h"
163b547af2SSatish Balay 
172593348eSBarry Smith 
18de6a44a3SBarry Smith /*
19de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
20de6a44a3SBarry Smith */
21de6a44a3SBarry Smith 
225615d1e5SSatish Balay #undef __FUNC__
235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
25de6a44a3SBarry Smith {
26de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
277fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
28de6a44a3SBarry Smith 
29de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
317fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
32de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33de6a44a3SBarry Smith       if (a->j[j] == i) {
34de6a44a3SBarry Smith         diag[i] = j;
35de6a44a3SBarry Smith         break;
36de6a44a3SBarry Smith       }
37de6a44a3SBarry Smith     }
38de6a44a3SBarry Smith   }
39de6a44a3SBarry Smith   a->diag = diag;
40de6a44a3SBarry Smith   return 0;
41de6a44a3SBarry Smith }
422593348eSBarry Smith 
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
72d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
93d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
11966963ce1SSatish Balay   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
13466963ce1SSatish Balay   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
151d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
175766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
180766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
18144cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18344cd7ae7SLois Curfman McInnes #else
18444cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18544cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18644cd7ae7SLois Curfman McInnes #endif
18744cd7ae7SLois Curfman McInnes           }
18844cd7ae7SLois Curfman McInnes         }
18944cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19044cd7ae7SLois Curfman McInnes       }
19144cd7ae7SLois Curfman McInnes     }
19244cd7ae7SLois Curfman McInnes   }
1932593348eSBarry Smith   else {
194b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
195b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
196b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
197b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
200766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
20188685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2027e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
204766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
205766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
206766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
207766eeae4SLois Curfman McInnes           }
20888685aaeSLois Curfman McInnes           else {
2097e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
21088685aaeSLois Curfman McInnes           }
21188685aaeSLois Curfman McInnes #else
2127e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
21388685aaeSLois Curfman McInnes #endif
2142593348eSBarry Smith           }
2152593348eSBarry Smith         }
2162593348eSBarry Smith         fprintf(fd,"\n");
2172593348eSBarry Smith       }
2182593348eSBarry Smith     }
219b6490206SBarry Smith   }
2202593348eSBarry Smith   fflush(fd);
2212593348eSBarry Smith   return 0;
2222593348eSBarry Smith }
2232593348eSBarry Smith 
2245615d1e5SSatish Balay #undef __FUNC__
225d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw"
2263270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2273270192aSSatish Balay {
2283270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2293270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2303270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2313270192aSSatish Balay   Scalar       *aa;
2323270192aSSatish Balay   Draw         draw;
2333270192aSSatish Balay   DrawButton   button;
2343270192aSSatish Balay   PetscTruth   isnull;
2353270192aSSatish Balay 
2363270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2373270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2383270192aSSatish Balay 
2393270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2403270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2413270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2423270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2433270192aSSatish Balay   color = DRAW_BLUE;
2443270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2453270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2463270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2473270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2483270192aSSatish Balay       aa = a->a + j*bs2;
2493270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2503270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2513270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
252b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2533270192aSSatish Balay         }
2543270192aSSatish Balay       }
2553270192aSSatish Balay     }
2563270192aSSatish Balay   }
2573270192aSSatish Balay   color = DRAW_CYAN;
2583270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2593270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2603270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2613270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2623270192aSSatish Balay       aa = a->a + j*bs2;
2633270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2643270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2653270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
266b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2673270192aSSatish Balay         }
2683270192aSSatish Balay       }
2693270192aSSatish Balay     }
2703270192aSSatish Balay   }
2713270192aSSatish Balay 
2723270192aSSatish Balay   color = DRAW_RED;
2733270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2743270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2753270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2763270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2773270192aSSatish Balay       aa = a->a + j*bs2;
2783270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2793270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2803270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
281b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2823270192aSSatish Balay         }
2833270192aSSatish Balay       }
2843270192aSSatish Balay     }
2853270192aSSatish Balay   }
2863270192aSSatish Balay 
2873270192aSSatish Balay   DrawFlush(draw);
2883270192aSSatish Balay   DrawGetPause(draw,&pause);
2893270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2903270192aSSatish Balay 
2913270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2923270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2933270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2943270192aSSatish Balay     DrawClear(draw);
2953270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2963270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2973270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2983270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2993270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
3003270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
3013270192aSSatish Balay     w *= scale; h *= scale;
3023270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
3033270192aSSatish Balay     color = DRAW_BLUE;
3043270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3053270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3063270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3073270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3083270192aSSatish Balay         aa = a->a + j*bs2;
3093270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3103270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3113270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
312b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3133270192aSSatish Balay           }
3143270192aSSatish Balay         }
3153270192aSSatish Balay       }
3163270192aSSatish Balay     }
3173270192aSSatish Balay     color = DRAW_CYAN;
3183270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3193270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3203270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3213270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3223270192aSSatish Balay         aa = a->a + j*bs2;
3233270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3243270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3253270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
326b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3273270192aSSatish Balay           }
3283270192aSSatish Balay         }
3293270192aSSatish Balay       }
3303270192aSSatish Balay     }
3313270192aSSatish Balay 
3323270192aSSatish Balay     color = DRAW_RED;
3333270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3343270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3353270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3363270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3373270192aSSatish Balay         aa = a->a + j*bs2;
3383270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3393270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3403270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
341b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3423270192aSSatish Balay           }
3433270192aSSatish Balay         }
3443270192aSSatish Balay       }
3453270192aSSatish Balay     }
3463270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3473270192aSSatish Balay   }
3483270192aSSatish Balay   return 0;
3493270192aSSatish Balay }
3503270192aSSatish Balay 
3515615d1e5SSatish Balay #undef __FUNC__
352d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ"
3538f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3542593348eSBarry Smith {
3552593348eSBarry Smith   Mat         A = (Mat) obj;
35619bcc07fSBarry Smith   ViewerType  vtype;
35719bcc07fSBarry Smith   int         ierr;
3582593348eSBarry Smith 
35919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
361e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3622593348eSBarry Smith   }
36319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
364b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3652593348eSBarry Smith   }
36619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
367b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3682593348eSBarry Smith   }
36919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3703270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3712593348eSBarry Smith   }
3722593348eSBarry Smith   return 0;
3732593348eSBarry Smith }
374b6490206SBarry Smith 
375119a934aSSatish Balay #define CHUNKSIZE  10
376cd0e1443SSatish Balay 
3775615d1e5SSatish Balay #undef __FUNC__
3785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
379639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
380cd0e1443SSatish Balay {
381cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
382cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
383cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
384a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3859df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
386cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
387cd0e1443SSatish Balay 
388cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
389cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3903b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
391e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
392e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3933b2fbd54SBarry Smith #endif
3942c3acbe9SBarry Smith     rp   = aj + ai[brow];
3952c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3962c3acbe9SBarry Smith     rmax = imax[brow];
3972c3acbe9SBarry Smith     nrow = ailen[brow];
398cd0e1443SSatish Balay     low  = 0;
399cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
4003b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
401e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
402e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
4033b2fbd54SBarry Smith #endif
404a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
405cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
406cd0e1443SSatish Balay       if (roworiented) {
407cd0e1443SSatish Balay         value = *v++;
408cd0e1443SSatish Balay       }
409cd0e1443SSatish Balay       else {
410cd0e1443SSatish Balay         value = v[k + l*m];
411cd0e1443SSatish Balay       }
412cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4132c3acbe9SBarry Smith       while (high-low > 7) {
414cd0e1443SSatish Balay         t = (low+high)/2;
415cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
416cd0e1443SSatish Balay         else              low  = t;
417cd0e1443SSatish Balay       }
418cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
419cd0e1443SSatish Balay         if (rp[i] > bcol) break;
420cd0e1443SSatish Balay         if (rp[i] == bcol) {
4217e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
422cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
423cd0e1443SSatish Balay           else                  *bap  = value;
424f1241b54SBarry Smith           goto noinsert1;
425cd0e1443SSatish Balay         }
426cd0e1443SSatish Balay       }
427c2653b3dSLois Curfman McInnes       if (nonew == 1) goto noinsert1;
42811ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
429cd0e1443SSatish Balay       if (nrow >= rmax) {
430cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
431cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
432cd0e1443SSatish Balay         Scalar *new_a;
433cd0e1443SSatish Balay 
43411ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
43596854ed6SLois Curfman McInnes 
43696854ed6SLois Curfman McInnes         /* Malloc new storage space */
4377e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
438cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4397e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
440cd0e1443SSatish Balay         new_i   = new_j + new_nz;
441cd0e1443SSatish Balay 
442cd0e1443SSatish Balay         /* copy over old data into new slots */
443cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4447e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
445a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
446a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
447a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
448cd0e1443SSatish Balay                                                            len*sizeof(int));
449a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
450a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
451a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
452a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
453cd0e1443SSatish Balay         /* free up old matrix storage */
454cd0e1443SSatish Balay         PetscFree(a->a);
455cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
456cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
457cd0e1443SSatish Balay         a->singlemalloc = 1;
458cd0e1443SSatish Balay 
459a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
460cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4617e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
46218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
463cd0e1443SSatish Balay         a->reallocs++;
464119a934aSSatish Balay         a->nz++;
465cd0e1443SSatish Balay       }
4667e67e3f9SSatish Balay       N = nrow++ - 1;
467cd0e1443SSatish Balay       /* shift up all the later entries in this row */
468cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
469cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4707e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
471cd0e1443SSatish Balay       }
4727e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
473cd0e1443SSatish Balay       rp[i]                      = bcol;
4747e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
475f1241b54SBarry Smith       noinsert1:;
476cd0e1443SSatish Balay       low = i;
477cd0e1443SSatish Balay     }
478cd0e1443SSatish Balay     ailen[brow] = nrow;
479cd0e1443SSatish Balay   }
480cd0e1443SSatish Balay   return 0;
481cd0e1443SSatish Balay }
482cd0e1443SSatish Balay 
4835615d1e5SSatish Balay #undef __FUNC__
48405a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
48592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
48692c4ed94SBarry Smith {
48792c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4888a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
489dd9472c6SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
490dd9472c6SBarry Smith   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
4910e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
49292c4ed94SBarry Smith 
4930e324ae4SSatish Balay   if (roworiented) {
4940e324ae4SSatish Balay     stepval = (n-1)*bs;
4950e324ae4SSatish Balay   } else {
4960e324ae4SSatish Balay     stepval = (m-1)*bs;
4970e324ae4SSatish Balay   }
49892c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
49992c4ed94SBarry Smith     row  = im[k];
50092c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
50192c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
5028a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
50392c4ed94SBarry Smith #endif
50492c4ed94SBarry Smith     rp   = aj + ai[row];
50592c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
50692c4ed94SBarry Smith     rmax = imax[row];
50792c4ed94SBarry Smith     nrow = ailen[row];
50892c4ed94SBarry Smith     low  = 0;
50992c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
51092c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
51192c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
5128a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
51392c4ed94SBarry Smith #endif
51492c4ed94SBarry Smith       col = in[l];
51592c4ed94SBarry Smith       if (roworiented) {
5160e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5170e324ae4SSatish Balay       } else {
5180e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
51992c4ed94SBarry Smith       }
52092c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
52192c4ed94SBarry Smith       while (high-low > 7) {
52292c4ed94SBarry Smith         t = (low+high)/2;
52392c4ed94SBarry Smith         if (rp[t] > col) high = t;
52492c4ed94SBarry Smith         else             low  = t;
52592c4ed94SBarry Smith       }
52692c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
52792c4ed94SBarry Smith         if (rp[i] > col) break;
52892c4ed94SBarry Smith         if (rp[i] == col) {
5298a84c255SSatish Balay           bap  = ap +  bs2*i;
5300e324ae4SSatish Balay           if (roworiented) {
5318a84c255SSatish Balay             if (is == ADD_VALUES) {
532dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
533dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5348a84c255SSatish Balay                   bap[jj] += *value++;
535dd9472c6SBarry Smith                 }
536dd9472c6SBarry Smith               }
5370e324ae4SSatish Balay             } else {
538dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
539dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5400e324ae4SSatish Balay                   bap[jj] = *value++;
5418a84c255SSatish Balay                 }
542dd9472c6SBarry Smith               }
543dd9472c6SBarry Smith             }
5440e324ae4SSatish Balay           } else {
5450e324ae4SSatish Balay             if (is == ADD_VALUES) {
546dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
547dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5480e324ae4SSatish Balay                   *bap++ += *value++;
549dd9472c6SBarry Smith                 }
550dd9472c6SBarry Smith               }
5510e324ae4SSatish Balay             } else {
552dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
553dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5540e324ae4SSatish Balay                   *bap++  = *value++;
5550e324ae4SSatish Balay                 }
5568a84c255SSatish Balay               }
557dd9472c6SBarry Smith             }
558dd9472c6SBarry Smith           }
559f1241b54SBarry Smith           goto noinsert2;
56092c4ed94SBarry Smith         }
56192c4ed94SBarry Smith       }
56289280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
56311ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
56492c4ed94SBarry Smith       if (nrow >= rmax) {
56592c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
56692c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
56792c4ed94SBarry Smith         Scalar *new_a;
56892c4ed94SBarry Smith 
56911ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
57089280ab3SLois Curfman McInnes 
57192c4ed94SBarry Smith         /* malloc new storage space */
57292c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
57392c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
57492c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
57592c4ed94SBarry Smith         new_i   = new_j + new_nz;
57692c4ed94SBarry Smith 
57792c4ed94SBarry Smith         /* copy over old data into new slots */
57892c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
57992c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
58092c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
58192c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
582dd9472c6SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
58392c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
58492c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
585dd9472c6SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
58692c4ed94SBarry Smith         /* free up old matrix storage */
58792c4ed94SBarry Smith         PetscFree(a->a);
58892c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
58992c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
59092c4ed94SBarry Smith         a->singlemalloc = 1;
59192c4ed94SBarry Smith 
59292c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
59392c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
59492c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
59592c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
59692c4ed94SBarry Smith         a->reallocs++;
59792c4ed94SBarry Smith         a->nz++;
59892c4ed94SBarry Smith       }
59992c4ed94SBarry Smith       N = nrow++ - 1;
60092c4ed94SBarry Smith       /* shift up all the later entries in this row */
60192c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
60292c4ed94SBarry Smith         rp[ii+1] = rp[ii];
60392c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
60492c4ed94SBarry Smith       }
60592c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
60692c4ed94SBarry Smith       rp[i] = col;
6078a84c255SSatish Balay       bap   = ap +  bs2*i;
6080e324ae4SSatish Balay       if (roworiented) {
609dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval) {
610dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs ) {
6110e324ae4SSatish Balay             bap[jj] = *value++;
612dd9472c6SBarry Smith           }
613dd9472c6SBarry Smith         }
6140e324ae4SSatish Balay       } else {
615dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval ) {
616dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++ ) {
6170e324ae4SSatish Balay             *bap++  = *value++;
6180e324ae4SSatish Balay           }
619dd9472c6SBarry Smith         }
620dd9472c6SBarry Smith       }
621f1241b54SBarry Smith       noinsert2:;
62292c4ed94SBarry Smith       low = i;
62392c4ed94SBarry Smith     }
62492c4ed94SBarry Smith     ailen[row] = nrow;
62592c4ed94SBarry Smith   }
62692c4ed94SBarry Smith   return 0;
62792c4ed94SBarry Smith }
62892c4ed94SBarry Smith 
62992c4ed94SBarry Smith #undef __FUNC__
630d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ"
6318f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6320b824a48SSatish Balay {
6330b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6340b824a48SSatish Balay   *m = a->m; *n = a->n;
6350b824a48SSatish Balay   return 0;
6360b824a48SSatish Balay }
6370b824a48SSatish Balay 
6385615d1e5SSatish Balay #undef __FUNC__
639d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
6408f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6410b824a48SSatish Balay {
6420b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6430b824a48SSatish Balay   *m = 0; *n = a->m;
6440b824a48SSatish Balay   return 0;
6450b824a48SSatish Balay }
6460b824a48SSatish Balay 
6475615d1e5SSatish Balay #undef __FUNC__
6485615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6499867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6509867e207SSatish Balay {
6519867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6527e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6539867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6549867e207SSatish Balay 
6559867e207SSatish Balay   bs  = a->bs;
6569867e207SSatish Balay   ai  = a->i;
6579867e207SSatish Balay   aj  = a->j;
6589867e207SSatish Balay   aa  = a->a;
6599df24120SSatish Balay   bs2 = a->bs2;
6609867e207SSatish Balay 
661e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6629867e207SSatish Balay 
6639867e207SSatish Balay   bn  = row/bs;   /* Block number */
6649867e207SSatish Balay   bp  = row % bs; /* Block Position */
6659867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6669867e207SSatish Balay   *nz = bs*M;
6679867e207SSatish Balay 
6689867e207SSatish Balay   if (v) {
6699867e207SSatish Balay     *v = 0;
6709867e207SSatish Balay     if (*nz) {
6719867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6729867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6739867e207SSatish Balay         v_i  = *v + i*bs;
6747e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6757e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6769867e207SSatish Balay       }
6779867e207SSatish Balay     }
6789867e207SSatish Balay   }
6799867e207SSatish Balay 
6809867e207SSatish Balay   if (idx) {
6819867e207SSatish Balay     *idx = 0;
6829867e207SSatish Balay     if (*nz) {
6839867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6849867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6859867e207SSatish Balay         idx_i = *idx + i*bs;
6869867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6879867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6889867e207SSatish Balay       }
6899867e207SSatish Balay     }
6909867e207SSatish Balay   }
6919867e207SSatish Balay   return 0;
6929867e207SSatish Balay }
6939867e207SSatish Balay 
6945615d1e5SSatish Balay #undef __FUNC__
695d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6969867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6979867e207SSatish Balay {
6989867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6999867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
7009867e207SSatish Balay   return 0;
7019867e207SSatish Balay }
702b6490206SBarry Smith 
7035615d1e5SSatish Balay #undef __FUNC__
7045615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
7058f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B)
706f2501298SSatish Balay {
707f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
708f2501298SSatish Balay   Mat         C;
709f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
7109df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
711f2501298SSatish Balay   Scalar      *array=a->a;
712f2501298SSatish Balay 
713f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
714e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
715f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
716f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
717a7c10996SSatish Balay 
718a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
719f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
720f2501298SSatish Balay   PetscFree(col);
721f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
722f2501298SSatish Balay   cols = rows + bs;
723f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
724f2501298SSatish Balay     cols[0] = i*bs;
725f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
726f2501298SSatish Balay     len = ai[i+1] - ai[i];
727f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
728f2501298SSatish Balay       rows[0] = (*aj++)*bs;
729f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
730f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
731f2501298SSatish Balay       array += bs2;
732f2501298SSatish Balay     }
733f2501298SSatish Balay   }
7341073c447SSatish Balay   PetscFree(rows);
735f2501298SSatish Balay 
7366d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7376d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
738f2501298SSatish Balay 
739f2501298SSatish Balay   if (B != PETSC_NULL) {
740f2501298SSatish Balay     *B = C;
741f2501298SSatish Balay   } else {
742f2501298SSatish Balay     /* This isn't really an in-place transpose */
743f2501298SSatish Balay     PetscFree(a->a);
744f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
745f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
746f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
747f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
748f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
749f2501298SSatish Balay     PetscFree(a);
750f09e8eb9SSatish Balay     PetscMemcpy(A,C,sizeof(struct _p_Mat));
751f2501298SSatish Balay     PetscHeaderDestroy(C);
752f2501298SSatish Balay   }
753f2501298SSatish Balay   return 0;
754f2501298SSatish Balay }
755f2501298SSatish Balay 
756f2501298SSatish Balay 
7575615d1e5SSatish Balay #undef __FUNC__
7585615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
7598f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
760584200bdSSatish Balay {
761584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
762584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
763a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
76443ee02c3SBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
765584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
766584200bdSSatish Balay 
7676d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
768584200bdSSatish Balay 
76943ee02c3SBarry Smith   if (m) rmax = ailen[0];
770584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
771584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
772584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
773d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
774584200bdSSatish Balay     if (fshift) {
775a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
776584200bdSSatish Balay       N = ailen[i];
777584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
778584200bdSSatish Balay         ip[j-fshift] = ip[j];
7797e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
780584200bdSSatish Balay       }
781584200bdSSatish Balay     }
782584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
783584200bdSSatish Balay   }
784584200bdSSatish Balay   if (mbs) {
785584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
786584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
787584200bdSSatish Balay   }
788584200bdSSatish Balay   /* reset ilen and imax for each row */
789584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
790584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
791584200bdSSatish Balay   }
792a7c10996SSatish Balay   a->nz = ai[mbs];
793584200bdSSatish Balay 
794584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
795584200bdSSatish Balay   if (fshift && a->diag) {
796584200bdSSatish Balay     PetscFree(a->diag);
797584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
798584200bdSSatish Balay     a->diag = 0;
799584200bdSSatish Balay   }
8003d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
801219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
8023d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
803584200bdSSatish Balay            a->reallocs);
804d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
805e2f3b5e9SSatish Balay   a->reallocs          = 0;
8064e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
8074e220ebcSLois Curfman McInnes 
808584200bdSSatish Balay   return 0;
809584200bdSSatish Balay }
810584200bdSSatish Balay 
8115615d1e5SSatish Balay #undef __FUNC__
8125615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
8138f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A)
8142593348eSBarry Smith {
815b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8169df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
8172593348eSBarry Smith   return 0;
8182593348eSBarry Smith }
8192593348eSBarry Smith 
8205615d1e5SSatish Balay #undef __FUNC__
821d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ"
822b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8232593348eSBarry Smith {
8242593348eSBarry Smith   Mat         A  = (Mat) obj;
825b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8262593348eSBarry Smith 
8272593348eSBarry Smith #if defined(PETSC_LOG)
8282593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8292593348eSBarry Smith #endif
8302593348eSBarry Smith   PetscFree(a->a);
8312593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8322593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8332593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8342593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8352593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
836de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8372593348eSBarry Smith   PetscFree(a);
838f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
839f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8402593348eSBarry Smith   return 0;
8412593348eSBarry Smith }
8422593348eSBarry Smith 
8435615d1e5SSatish Balay #undef __FUNC__
844d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ"
8458f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8462593348eSBarry Smith {
847b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8486d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
8496d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
8506d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
851219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
8526d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
853c2653b3dSLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
85496854ed6SLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
8556d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
8566d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
857219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8586d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8596d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
86090f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
8612b362799SSatish Balay            op == MAT_IGNORE_OFF_PROC_ENTRIES)
86294a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8636d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
864e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8652593348eSBarry Smith   else
866e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8672593348eSBarry Smith   return 0;
8682593348eSBarry Smith }
8692593348eSBarry Smith 
8702593348eSBarry Smith 
8712593348eSBarry Smith /* -------------------------------------------------------*/
8722593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8732593348eSBarry Smith /* -------------------------------------------------------*/
874b6490206SBarry Smith #include "pinclude/plapack.h"
875b6490206SBarry Smith 
8765615d1e5SSatish Balay #undef __FUNC__
8775615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
87839b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8792593348eSBarry Smith {
880b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
88139b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
882c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8832593348eSBarry Smith 
884c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
885c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
886b6490206SBarry Smith 
887119a934aSSatish Balay   idx   = a->j;
888119a934aSSatish Balay   v     = a->a;
889119a934aSSatish Balay   ii    = a->i;
890119a934aSSatish Balay 
891119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
892119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
893119a934aSSatish Balay     sum  = 0.0;
894119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8951a6a6d98SBarry Smith     z[i] = sum;
896119a934aSSatish Balay   }
897c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
898c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
89939b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
90039b95ed1SBarry Smith   return 0;
90139b95ed1SBarry Smith }
90239b95ed1SBarry Smith 
9035615d1e5SSatish Balay #undef __FUNC__
9045615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
90539b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
90639b95ed1SBarry Smith {
90739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
90839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
90939b95ed1SBarry Smith   register Scalar x1,x2;
91039b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
911c16cb8f2SBarry Smith   int             j,n;
91239b95ed1SBarry Smith 
913c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
914c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
91539b95ed1SBarry Smith 
91639b95ed1SBarry Smith   idx   = a->j;
91739b95ed1SBarry Smith   v     = a->a;
91839b95ed1SBarry Smith   ii    = a->i;
91939b95ed1SBarry Smith 
920119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
921119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
922119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
923119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
924119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
925119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
926119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
927119a934aSSatish Balay       v += 4;
928119a934aSSatish Balay     }
9291a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
930119a934aSSatish Balay     z += 2;
931119a934aSSatish Balay   }
932c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
933c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
93439b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
93539b95ed1SBarry Smith   return 0;
93639b95ed1SBarry Smith }
93739b95ed1SBarry Smith 
9385615d1e5SSatish Balay #undef __FUNC__
9395615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
94039b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
94139b95ed1SBarry Smith {
94239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
94339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
944c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
94539b95ed1SBarry Smith 
946c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
947c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
94839b95ed1SBarry Smith 
94939b95ed1SBarry Smith   idx   = a->j;
95039b95ed1SBarry Smith   v     = a->a;
95139b95ed1SBarry Smith   ii    = a->i;
95239b95ed1SBarry Smith 
953119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
954119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
955119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
956119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
957119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
958119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
959119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
960119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
961119a934aSSatish Balay       v += 9;
962119a934aSSatish Balay     }
9631a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
964119a934aSSatish Balay     z += 3;
965119a934aSSatish Balay   }
966c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
967c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
96839b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
96939b95ed1SBarry Smith   return 0;
97039b95ed1SBarry Smith }
97139b95ed1SBarry Smith 
9725615d1e5SSatish Balay #undef __FUNC__
9735615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
97439b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
97539b95ed1SBarry Smith {
97639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
97739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
97839b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
97939b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
980c16cb8f2SBarry Smith   int             j,n;
98139b95ed1SBarry Smith 
982c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
983c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
98439b95ed1SBarry Smith 
98539b95ed1SBarry Smith   idx   = a->j;
98639b95ed1SBarry Smith   v     = a->a;
98739b95ed1SBarry Smith   ii    = a->i;
98839b95ed1SBarry Smith 
989119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
990119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
991119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
992119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
993119a934aSSatish Balay       xb = x + 4*(*idx++);
994119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
995119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
996119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
997119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
998119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
999119a934aSSatish Balay       v += 16;
1000119a934aSSatish Balay     }
10011a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1002119a934aSSatish Balay     z += 4;
1003119a934aSSatish Balay   }
1004c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1005c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
100639b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
100739b95ed1SBarry Smith   return 0;
100839b95ed1SBarry Smith }
100939b95ed1SBarry Smith 
10105615d1e5SSatish Balay #undef __FUNC__
10115615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
101239b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
101339b95ed1SBarry Smith {
101439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
101539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
101639b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
1017c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
101839b95ed1SBarry Smith 
1019c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1020c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
102139b95ed1SBarry Smith 
102239b95ed1SBarry Smith   idx   = a->j;
102339b95ed1SBarry Smith   v     = a->a;
102439b95ed1SBarry Smith   ii    = a->i;
102539b95ed1SBarry Smith 
1026119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1027119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1028119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1029119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1030119a934aSSatish Balay       xb = x + 5*(*idx++);
1031119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1032119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1033119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1034119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1035119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1036119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1037119a934aSSatish Balay       v += 25;
1038119a934aSSatish Balay     }
10391a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1040119a934aSSatish Balay     z += 5;
1041119a934aSSatish Balay   }
1042c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1043c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
104439b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
104539b95ed1SBarry Smith   return 0;
104639b95ed1SBarry Smith }
104739b95ed1SBarry Smith 
10485615d1e5SSatish Balay #undef __FUNC__
104948e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
105048e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
105148e9ddb2SSatish Balay {
105248e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
105348e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
105448e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
105548e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
105648e9ddb2SSatish Balay 
105748e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
105848e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
105948e9ddb2SSatish Balay 
106048e9ddb2SSatish Balay   idx   = a->j;
106148e9ddb2SSatish Balay   v     = a->a;
106248e9ddb2SSatish Balay   ii    = a->i;
106348e9ddb2SSatish Balay 
106448e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
106548e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
106648e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
106748e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
106848e9ddb2SSatish Balay       xb = x + 7*(*idx++);
106948e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
107048e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
107148e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
107248e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
107348e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
107448e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
107548e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
107648e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
107748e9ddb2SSatish Balay       v += 49;
107848e9ddb2SSatish Balay     }
107948e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
108048e9ddb2SSatish Balay     z += 7;
108148e9ddb2SSatish Balay   }
108248e9ddb2SSatish Balay 
108348e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
108448e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
108548e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
108648e9ddb2SSatish Balay   return 0;
108748e9ddb2SSatish Balay }
108848e9ddb2SSatish Balay 
108948e9ddb2SSatish Balay #undef __FUNC__
10905615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
109139b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
109239b95ed1SBarry Smith {
109339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
109439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1095c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1096c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1097c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
109839b95ed1SBarry Smith 
1099c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1100c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
110139b95ed1SBarry Smith 
110239b95ed1SBarry Smith   idx   = a->j;
110339b95ed1SBarry Smith   v     = a->a;
110439b95ed1SBarry Smith   ii    = a->i;
110539b95ed1SBarry Smith 
110639b95ed1SBarry Smith 
1107119a934aSSatish Balay   if (!a->mult_work) {
11083b547af2SSatish Balay     k = PetscMax(a->m,a->n);
11092b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1110119a934aSSatish Balay   }
1111119a934aSSatish Balay   work = a->mult_work;
1112119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1113119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1114119a934aSSatish Balay     ncols = n*bs;
1115119a934aSSatish Balay     workt = work;
1116119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1117119a934aSSatish Balay       xb = x + bs*(*idx++);
1118119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1119119a934aSSatish Balay       workt += bs;
1120119a934aSSatish Balay     }
11211a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1122119a934aSSatish Balay     v += n*bs2;
1123119a934aSSatish Balay     z += bs;
1124119a934aSSatish Balay   }
1125c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1126c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11271a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1128bb42667fSSatish Balay   return 0;
1129bb42667fSSatish Balay }
1130bb42667fSSatish Balay 
11315615d1e5SSatish Balay #undef __FUNC__
11325615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1133f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1134f44d3a6dSSatish Balay {
1135f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1136f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1137c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1138f44d3a6dSSatish Balay 
1139c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1140c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1141c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1142f44d3a6dSSatish Balay 
1143f44d3a6dSSatish Balay   idx   = a->j;
1144f44d3a6dSSatish Balay   v     = a->a;
1145f44d3a6dSSatish Balay   ii    = a->i;
1146f44d3a6dSSatish Balay 
1147f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1148f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1149f44d3a6dSSatish Balay     sum  = y[i];
1150f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1151f44d3a6dSSatish Balay     z[i] = sum;
1152f44d3a6dSSatish Balay   }
1153c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1154c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1155c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1156f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1157f44d3a6dSSatish Balay   return 0;
1158f44d3a6dSSatish Balay }
1159f44d3a6dSSatish Balay 
11605615d1e5SSatish Balay #undef __FUNC__
11615615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1162f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1163f44d3a6dSSatish Balay {
1164f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1165f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1166f44d3a6dSSatish Balay   register Scalar x1,x2;
1167f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1168c16cb8f2SBarry Smith   int             j,n;
1169f44d3a6dSSatish Balay 
1170c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1171c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1172c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1173f44d3a6dSSatish Balay 
1174f44d3a6dSSatish Balay   idx   = a->j;
1175f44d3a6dSSatish Balay   v     = a->a;
1176f44d3a6dSSatish Balay   ii    = a->i;
1177f44d3a6dSSatish Balay 
1178f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1179f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1180f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1181f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1182f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1183f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1184f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1185f44d3a6dSSatish Balay       v += 4;
1186f44d3a6dSSatish Balay     }
1187f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1188f44d3a6dSSatish Balay     z += 2; y += 2;
1189f44d3a6dSSatish Balay   }
1190c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1191c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1192c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1193f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1194f44d3a6dSSatish Balay   return 0;
1195f44d3a6dSSatish Balay }
1196f44d3a6dSSatish Balay 
11975615d1e5SSatish Balay #undef __FUNC__
11985615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1199f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1200f44d3a6dSSatish Balay {
1201f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1202f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1203c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1204f44d3a6dSSatish Balay 
1205c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1206c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1207c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1208f44d3a6dSSatish Balay 
1209f44d3a6dSSatish Balay   idx   = a->j;
1210f44d3a6dSSatish Balay   v     = a->a;
1211f44d3a6dSSatish Balay   ii    = a->i;
1212f44d3a6dSSatish Balay 
1213f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1214f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1215f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1216f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1217f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1218f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1219f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1220f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1221f44d3a6dSSatish Balay       v += 9;
1222f44d3a6dSSatish Balay     }
1223f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1224f44d3a6dSSatish Balay     z += 3; y += 3;
1225f44d3a6dSSatish Balay   }
1226c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1227c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1228c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1229f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1230f44d3a6dSSatish Balay   return 0;
1231f44d3a6dSSatish Balay }
1232f44d3a6dSSatish Balay 
12335615d1e5SSatish Balay #undef __FUNC__
12345615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1235f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1236f44d3a6dSSatish Balay {
1237f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1238f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1239f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1240f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1241c16cb8f2SBarry Smith   int             j,n;
1242f44d3a6dSSatish Balay 
1243c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1244c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1245c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1246f44d3a6dSSatish Balay 
1247f44d3a6dSSatish Balay   idx   = a->j;
1248f44d3a6dSSatish Balay   v     = a->a;
1249f44d3a6dSSatish Balay   ii    = a->i;
1250f44d3a6dSSatish Balay 
1251f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1252f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1253f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1254f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1255f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1256f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1257f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1258f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1259f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1260f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1261f44d3a6dSSatish Balay       v += 16;
1262f44d3a6dSSatish Balay     }
1263f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1264f44d3a6dSSatish Balay     z += 4; y += 4;
1265f44d3a6dSSatish Balay   }
1266c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1267c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1268c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1269f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1270f44d3a6dSSatish Balay   return 0;
1271f44d3a6dSSatish Balay }
1272f44d3a6dSSatish Balay 
12735615d1e5SSatish Balay #undef __FUNC__
12745615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1275f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1276f44d3a6dSSatish Balay {
1277f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1278f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1279f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1280c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1281f44d3a6dSSatish Balay 
1282c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1283c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1284c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1285f44d3a6dSSatish Balay 
1286f44d3a6dSSatish Balay   idx   = a->j;
1287f44d3a6dSSatish Balay   v     = a->a;
1288f44d3a6dSSatish Balay   ii    = a->i;
1289f44d3a6dSSatish Balay 
1290f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1291f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1292f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1293f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1294f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1295f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1296f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1297f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1298f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1299f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1300f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1301f44d3a6dSSatish Balay       v += 25;
1302f44d3a6dSSatish Balay     }
1303f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1304f44d3a6dSSatish Balay     z += 5; y += 5;
1305f44d3a6dSSatish Balay   }
1306c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1307c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1308c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1309f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1310f44d3a6dSSatish Balay   return 0;
1311f44d3a6dSSatish Balay }
1312f44d3a6dSSatish Balay 
13135615d1e5SSatish Balay #undef __FUNC__
131448e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
131548e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
131648e9ddb2SSatish Balay {
131748e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
131848e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
131948e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
132048e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
132148e9ddb2SSatish Balay 
132248e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
132348e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
132448e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
132548e9ddb2SSatish Balay 
132648e9ddb2SSatish Balay   idx   = a->j;
132748e9ddb2SSatish Balay   v     = a->a;
132848e9ddb2SSatish Balay   ii    = a->i;
132948e9ddb2SSatish Balay 
133048e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
133148e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
133248e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
133348e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
133448e9ddb2SSatish Balay       xb = x + 7*(*idx++);
133548e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
133648e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
133748e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
133848e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
133948e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
134048e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
134148e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
134248e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
134348e9ddb2SSatish Balay       v += 49;
134448e9ddb2SSatish Balay     }
134548e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
134648e9ddb2SSatish Balay     z += 7; y += 7;
134748e9ddb2SSatish Balay   }
134848e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
134948e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
135048e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
135148e9ddb2SSatish Balay   PLogFlops(98*a->nz);
135248e9ddb2SSatish Balay   return 0;
135348e9ddb2SSatish Balay }
135448e9ddb2SSatish Balay 
135548e9ddb2SSatish Balay 
135648e9ddb2SSatish Balay #undef __FUNC__
13575615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1358f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1359f44d3a6dSSatish Balay {
1360f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1361f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1362f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1363f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1364f44d3a6dSSatish Balay 
1365f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1366f44d3a6dSSatish Balay 
1367c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1368c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1369f44d3a6dSSatish Balay 
1370f44d3a6dSSatish Balay   idx   = a->j;
1371f44d3a6dSSatish Balay   v     = a->a;
1372f44d3a6dSSatish Balay   ii    = a->i;
1373f44d3a6dSSatish Balay 
1374f44d3a6dSSatish Balay 
1375f44d3a6dSSatish Balay   if (!a->mult_work) {
1376f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1377f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1378f44d3a6dSSatish Balay   }
1379f44d3a6dSSatish Balay   work = a->mult_work;
1380f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1381f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1382f44d3a6dSSatish Balay     ncols = n*bs;
1383f44d3a6dSSatish Balay     workt = work;
1384f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1385f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1386f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1387f44d3a6dSSatish Balay       workt += bs;
1388f44d3a6dSSatish Balay     }
1389f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1390f44d3a6dSSatish Balay     v += n*bs2;
1391f44d3a6dSSatish Balay     z += bs;
1392f44d3a6dSSatish Balay   }
1393c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1394c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1395f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1396f44d3a6dSSatish Balay   return 0;
1397f44d3a6dSSatish Balay }
1398f44d3a6dSSatish Balay 
13995615d1e5SSatish Balay #undef __FUNC__
14005615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
14018f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1402bb42667fSSatish Balay {
1403bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
14041a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1405bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1406bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
14079df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1408119a934aSSatish Balay 
1409119a934aSSatish Balay 
141090f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
141190f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1412bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1413bb42667fSSatish Balay 
1414119a934aSSatish Balay   idx   = a->j;
1415119a934aSSatish Balay   v     = a->a;
1416119a934aSSatish Balay   ii    = a->i;
1417119a934aSSatish Balay 
1418119a934aSSatish Balay   switch (bs) {
1419119a934aSSatish Balay   case 1:
1420119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1421119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1422119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1423119a934aSSatish Balay       ib = idx + ai[i];
1424119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1425bb1453f0SSatish Balay         rval    = ib[j];
1426bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1427119a934aSSatish Balay       }
1428119a934aSSatish Balay     }
1429119a934aSSatish Balay     break;
1430119a934aSSatish Balay   case 2:
1431119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1432119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1433119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1434119a934aSSatish Balay       ib = idx + ai[i];
1435119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1436119a934aSSatish Balay         rval      = ib[j]*2;
1437bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1438bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1439119a934aSSatish Balay         v += 4;
1440119a934aSSatish Balay       }
1441119a934aSSatish Balay     }
1442119a934aSSatish Balay     break;
1443119a934aSSatish Balay   case 3:
1444119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1445119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1446119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1447119a934aSSatish Balay       ib = idx + ai[i];
1448119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1449119a934aSSatish Balay         rval      = ib[j]*3;
1450bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1451bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1452bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1453119a934aSSatish Balay         v += 9;
1454119a934aSSatish Balay       }
1455119a934aSSatish Balay     }
1456119a934aSSatish Balay     break;
1457119a934aSSatish Balay   case 4:
1458119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1459119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1460119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1461119a934aSSatish Balay       ib = idx + ai[i];
1462119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1463119a934aSSatish Balay         rval      = ib[j]*4;
1464bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1465bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1466bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1467bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1468119a934aSSatish Balay         v += 16;
1469119a934aSSatish Balay       }
1470119a934aSSatish Balay     }
1471119a934aSSatish Balay     break;
1472119a934aSSatish Balay   case 5:
1473119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1474119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1475119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1476119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1477119a934aSSatish Balay       ib = idx + ai[i];
1478119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1479119a934aSSatish Balay         rval      = ib[j]*5;
1480bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1481bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1482bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1483bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1484bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1485119a934aSSatish Balay         v += 25;
1486119a934aSSatish Balay       }
1487119a934aSSatish Balay     }
1488119a934aSSatish Balay     break;
1489119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1490119a934aSSatish Balay     default: {
1491119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1492119a934aSSatish Balay       if (!a->mult_work) {
14933b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1494bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1495119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1496119a934aSSatish Balay       }
1497119a934aSSatish Balay       work = a->mult_work;
1498119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1499119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1500119a934aSSatish Balay         ncols = n*bs;
1501119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1502119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1503119a934aSSatish Balay         v += n*bs2;
1504119a934aSSatish Balay         x += bs;
1505119a934aSSatish Balay         workt = work;
1506119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1507119a934aSSatish Balay           zb = z + bs*(*idx++);
1508bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1509119a934aSSatish Balay           workt += bs;
1510119a934aSSatish Balay         }
1511119a934aSSatish Balay       }
1512119a934aSSatish Balay     }
1513119a934aSSatish Balay   }
15140419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
15150419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1516faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1517faf6580fSSatish Balay   return 0;
1518faf6580fSSatish Balay }
15191c351548SSatish Balay 
15205615d1e5SSatish Balay #undef __FUNC__
15215615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
15228f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1523faf6580fSSatish Balay {
1524faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1525faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1526faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1527faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1528faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1529faf6580fSSatish Balay 
1530faf6580fSSatish Balay 
1531faf6580fSSatish Balay 
153290f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
153390f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1534faf6580fSSatish Balay 
1535648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1536648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1537648c1d95SSatish Balay 
1538faf6580fSSatish Balay 
1539faf6580fSSatish Balay   idx   = a->j;
1540faf6580fSSatish Balay   v     = a->a;
1541faf6580fSSatish Balay   ii    = a->i;
1542faf6580fSSatish Balay 
1543faf6580fSSatish Balay   switch (bs) {
1544faf6580fSSatish Balay   case 1:
1545faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1546faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1547faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1548faf6580fSSatish Balay       ib = idx + ai[i];
1549faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1550faf6580fSSatish Balay         rval    = ib[j];
1551faf6580fSSatish Balay         z[rval] += *v++ * x1;
1552faf6580fSSatish Balay       }
1553faf6580fSSatish Balay     }
1554faf6580fSSatish Balay     break;
1555faf6580fSSatish Balay   case 2:
1556faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1557faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1558faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1559faf6580fSSatish Balay       ib = idx + ai[i];
1560faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1561faf6580fSSatish Balay         rval      = ib[j]*2;
1562faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1563faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1564faf6580fSSatish Balay         v += 4;
1565faf6580fSSatish Balay       }
1566faf6580fSSatish Balay     }
1567faf6580fSSatish Balay     break;
1568faf6580fSSatish Balay   case 3:
1569faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1570faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1571faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1572faf6580fSSatish Balay       ib = idx + ai[i];
1573faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1574faf6580fSSatish Balay         rval      = ib[j]*3;
1575faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1576faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1577faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1578faf6580fSSatish Balay         v += 9;
1579faf6580fSSatish Balay       }
1580faf6580fSSatish Balay     }
1581faf6580fSSatish Balay     break;
1582faf6580fSSatish Balay   case 4:
1583faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1584faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1585faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1586faf6580fSSatish Balay       ib = idx + ai[i];
1587faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1588faf6580fSSatish Balay         rval      = ib[j]*4;
1589faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1590faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1591faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1592faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1593faf6580fSSatish Balay         v += 16;
1594faf6580fSSatish Balay       }
1595faf6580fSSatish Balay     }
1596faf6580fSSatish Balay     break;
1597faf6580fSSatish Balay   case 5:
1598faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1599faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1600faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1601faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1602faf6580fSSatish Balay       ib = idx + ai[i];
1603faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1604faf6580fSSatish Balay         rval      = ib[j]*5;
1605faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1606faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1607faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1608faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1609faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1610faf6580fSSatish Balay         v += 25;
1611faf6580fSSatish Balay       }
1612faf6580fSSatish Balay     }
1613faf6580fSSatish Balay     break;
1614faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1615faf6580fSSatish Balay     default: {
1616faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1617faf6580fSSatish Balay       if (!a->mult_work) {
1618faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1619faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1620faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1621faf6580fSSatish Balay       }
1622faf6580fSSatish Balay       work = a->mult_work;
1623faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1624faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1625faf6580fSSatish Balay         ncols = n*bs;
1626faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1627faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1628faf6580fSSatish Balay         v += n*bs2;
1629faf6580fSSatish Balay         x += bs;
1630faf6580fSSatish Balay         workt = work;
1631faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1632faf6580fSSatish Balay           zb = z + bs*(*idx++);
1633faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1634faf6580fSSatish Balay           workt += bs;
1635faf6580fSSatish Balay         }
1636faf6580fSSatish Balay       }
1637faf6580fSSatish Balay     }
1638faf6580fSSatish Balay   }
1639faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1640faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1641faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16422593348eSBarry Smith   return 0;
16432593348eSBarry Smith }
16442593348eSBarry Smith 
16455615d1e5SSatish Balay #undef __FUNC__
1646d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ"
16478f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16482593348eSBarry Smith {
1649b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16504e220ebcSLois Curfman McInnes 
16514e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16524e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16534e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16544e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16554e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16564e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16574e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16584e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16594e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16604e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16614e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16624e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16634e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16644e220ebcSLois Curfman McInnes   if (A->factor) {
16654e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16664e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16674e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16684e220ebcSLois Curfman McInnes   } else {
16694e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16704e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16714e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16724e220ebcSLois Curfman McInnes   }
16732593348eSBarry Smith   return 0;
16742593348eSBarry Smith }
16752593348eSBarry Smith 
16765615d1e5SSatish Balay #undef __FUNC__
16775615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
16788f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
167991d316f6SSatish Balay {
168091d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
168191d316f6SSatish Balay 
1682e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
168391d316f6SSatish Balay 
168491d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
168591d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1686a7c10996SSatish Balay       (a->nz != b->nz)) {
168791d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168891d316f6SSatish Balay   }
168991d316f6SSatish Balay 
169091d316f6SSatish Balay   /* if the a->i are the same */
169191d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
169291d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169391d316f6SSatish Balay   }
169491d316f6SSatish Balay 
169591d316f6SSatish Balay   /* if a->j are the same */
169691d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
169791d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169891d316f6SSatish Balay   }
169991d316f6SSatish Balay 
170091d316f6SSatish Balay   /* if a->a are the same */
170191d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
170291d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
170391d316f6SSatish Balay   }
170491d316f6SSatish Balay   *flg = PETSC_TRUE;
170591d316f6SSatish Balay   return 0;
170691d316f6SSatish Balay 
170791d316f6SSatish Balay }
170891d316f6SSatish Balay 
17095615d1e5SSatish Balay #undef __FUNC__
17105615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
17118f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
171291d316f6SSatish Balay {
171391d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17147e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
171517e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
171617e48fc4SSatish Balay 
17170513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
171817e48fc4SSatish Balay   bs   = a->bs;
171917e48fc4SSatish Balay   aa   = a->a;
172017e48fc4SSatish Balay   ai   = a->i;
172117e48fc4SSatish Balay   aj   = a->j;
172217e48fc4SSatish Balay   ambs = a->mbs;
17239df24120SSatish Balay   bs2  = a->bs2;
172491d316f6SSatish Balay 
172591d316f6SSatish Balay   VecSet(&zero,v);
172690f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1727e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
172817e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
172917e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
173017e48fc4SSatish Balay       if (aj[j] == i) {
173117e48fc4SSatish Balay         row  = i*bs;
17327e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17337e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
173491d316f6SSatish Balay         break;
173591d316f6SSatish Balay       }
173691d316f6SSatish Balay     }
173791d316f6SSatish Balay   }
173891d316f6SSatish Balay   return 0;
173991d316f6SSatish Balay }
174091d316f6SSatish Balay 
17415615d1e5SSatish Balay #undef __FUNC__
17425615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17438f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17449867e207SSatish Balay {
17459867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17469867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17477e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17489867e207SSatish Balay 
17499867e207SSatish Balay   ai  = a->i;
17509867e207SSatish Balay   aj  = a->j;
17519867e207SSatish Balay   aa  = a->a;
17529867e207SSatish Balay   m   = a->m;
17539867e207SSatish Balay   n   = a->n;
17549867e207SSatish Balay   bs  = a->bs;
17559867e207SSatish Balay   mbs = a->mbs;
17569df24120SSatish Balay   bs2 = a->bs2;
17579867e207SSatish Balay   if (ll) {
175890f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1759e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17609867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17619867e207SSatish Balay       M  = ai[i+1] - ai[i];
17629867e207SSatish Balay       li = l + i*bs;
17637e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17649867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17657e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17669867e207SSatish Balay           (*v++) *= li[k%bs];
17679867e207SSatish Balay         }
17689867e207SSatish Balay       }
17699867e207SSatish Balay     }
17709867e207SSatish Balay   }
17719867e207SSatish Balay 
17729867e207SSatish Balay   if (rr) {
177390f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1774e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17759867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17769867e207SSatish Balay       M  = ai[i+1] - ai[i];
17777e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17789867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17799867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17809867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17819867e207SSatish Balay           x = ri[k];
17829867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17839867e207SSatish Balay         }
17849867e207SSatish Balay       }
17859867e207SSatish Balay     }
17869867e207SSatish Balay   }
17879867e207SSatish Balay   return 0;
17889867e207SSatish Balay }
17899867e207SSatish Balay 
17909867e207SSatish Balay 
1791b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1792b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17932a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1794736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1795736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17961a6a6d98SBarry Smith 
17971a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17981a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
17991a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
18001a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
18011a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
18021a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
180348e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
18041a6a6d98SBarry Smith 
18057fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
18067fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
18077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
18087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
18097fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
18107fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
18112593348eSBarry Smith 
18125615d1e5SSatish Balay #undef __FUNC__
18135615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
18148f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
18152593348eSBarry Smith {
1816b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18172593348eSBarry Smith   Scalar      *v = a->a;
18182593348eSBarry Smith   double      sum = 0.0;
18199df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18202593348eSBarry Smith 
18212593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18220419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18232593348eSBarry Smith #if defined(PETSC_COMPLEX)
18242593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18252593348eSBarry Smith #else
18262593348eSBarry Smith       sum += (*v)*(*v); v++;
18272593348eSBarry Smith #endif
18282593348eSBarry Smith     }
18292593348eSBarry Smith     *norm = sqrt(sum);
18302593348eSBarry Smith   }
18312593348eSBarry Smith   else {
1832e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18332593348eSBarry Smith   }
18342593348eSBarry Smith   return 0;
18352593348eSBarry Smith }
18362593348eSBarry Smith 
18373eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
18382593348eSBarry Smith /*
18392593348eSBarry Smith      note: This can only work for identity for row and col. It would
18402593348eSBarry Smith    be good to check this and otherwise generate an error.
18412593348eSBarry Smith */
18425615d1e5SSatish Balay #undef __FUNC__
18435615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
18448f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18452593348eSBarry Smith {
1846b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18472593348eSBarry Smith   Mat         outA;
1848de6a44a3SBarry Smith   int         ierr;
18492593348eSBarry Smith 
1850e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18512593348eSBarry Smith 
18522593348eSBarry Smith   outA          = inA;
18532593348eSBarry Smith   inA->factor   = FACTOR_LU;
18542593348eSBarry Smith   a->row        = row;
18552593348eSBarry Smith   a->col        = col;
18562593348eSBarry Smith 
1857eed86810SBarry Smith   if (!a->solve_work) {
1858de6a44a3SBarry Smith     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1859eed86810SBarry Smith     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1860eed86810SBarry Smith   }
18612593348eSBarry Smith 
18622593348eSBarry Smith   if (!a->diag) {
1863b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18642593348eSBarry Smith   }
18657fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18663eee16b0SBarry Smith 
18673eee16b0SBarry Smith   /*
18683eee16b0SBarry Smith       Blocksize 4 has a special faster solver for ILU(0) factorization
18693eee16b0SBarry Smith     with natural ordering
18703eee16b0SBarry Smith   */
18713eee16b0SBarry Smith   if (a->bs == 4) {
18723eee16b0SBarry Smith     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
18733eee16b0SBarry Smith   }
18743eee16b0SBarry Smith 
18752593348eSBarry Smith   return 0;
18762593348eSBarry Smith }
18772593348eSBarry Smith 
18785615d1e5SSatish Balay #undef __FUNC__
18795615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
18808f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18812593348eSBarry Smith {
1882b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18839df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1884b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1885b6490206SBarry Smith   PLogFlops(totalnz);
18862593348eSBarry Smith   return 0;
18872593348eSBarry Smith }
18882593348eSBarry Smith 
18895615d1e5SSatish Balay #undef __FUNC__
18905615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18918f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18927e67e3f9SSatish Balay {
18937e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18947e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1895a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18969df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18977e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18987e67e3f9SSatish Balay 
18997e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
19007e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1901e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1902e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1903a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
19047e67e3f9SSatish Balay     nrow = ailen[brow];
19057e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1906e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1907e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1908a7c10996SSatish Balay       col  = in[l] ;
19097e67e3f9SSatish Balay       bcol = col/bs;
19107e67e3f9SSatish Balay       cidx = col%bs;
19117e67e3f9SSatish Balay       ridx = row%bs;
19127e67e3f9SSatish Balay       high = nrow;
19137e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
19147e67e3f9SSatish Balay       while (high-low > 5) {
19157e67e3f9SSatish Balay         t = (low+high)/2;
19167e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
19177e67e3f9SSatish Balay         else             low  = t;
19187e67e3f9SSatish Balay       }
19197e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
19207e67e3f9SSatish Balay         if (rp[i] > bcol) break;
19217e67e3f9SSatish Balay         if (rp[i] == bcol) {
19227e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
19237e67e3f9SSatish Balay           goto finished;
19247e67e3f9SSatish Balay         }
19257e67e3f9SSatish Balay       }
19267e67e3f9SSatish Balay       *v++ = zero;
19277e67e3f9SSatish Balay       finished:;
19287e67e3f9SSatish Balay     }
19297e67e3f9SSatish Balay   }
19307e67e3f9SSatish Balay   return 0;
19317e67e3f9SSatish Balay }
19327e67e3f9SSatish Balay 
19335615d1e5SSatish Balay #undef __FUNC__
1934d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
19358f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19365a838052SSatish Balay {
19375a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19385a838052SSatish Balay   *bs = baij->bs;
19395a838052SSatish Balay   return 0;
19405a838052SSatish Balay }
19415a838052SSatish Balay 
1942d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19435615d1e5SSatish Balay #undef __FUNC__
19445615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1945d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1946d9b7c43dSSatish Balay {
1947d9b7c43dSSatish Balay   int i,row;
1948d9b7c43dSSatish Balay   row = idx[0];
1949d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1950d9b7c43dSSatish Balay 
1951d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1952d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1953d9b7c43dSSatish Balay   }
1954d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1955d9b7c43dSSatish Balay   return 0;
1956d9b7c43dSSatish Balay }
1957d9b7c43dSSatish Balay 
19585615d1e5SSatish Balay #undef __FUNC__
19595615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
19608f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1961d9b7c43dSSatish Balay {
1962d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1963d9b7c43dSSatish Balay   IS          is_local;
1964d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1965d9b7c43dSSatish Balay   PetscTruth  flg;
1966d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1967d9b7c43dSSatish Balay 
1968d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1969d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1970d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1971537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1972d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1973d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1974d9b7c43dSSatish Balay 
1975d9b7c43dSSatish Balay   i = 0;
1976d9b7c43dSSatish Balay   while (i < is_n) {
1977e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1978d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1979d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1980d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1981d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1982d9b7c43dSSatish Balay       i += bs;
1983d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1984d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1985d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1986d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1987d9b7c43dSSatish Balay         aa[0] = zero;
1988d9b7c43dSSatish Balay         aa+=bs;
1989d9b7c43dSSatish Balay       }
1990d9b7c43dSSatish Balay       i++;
1991d9b7c43dSSatish Balay     }
1992d9b7c43dSSatish Balay   }
1993d9b7c43dSSatish Balay   if (diag) {
1994d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1995d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1996d9b7c43dSSatish Balay     }
1997d9b7c43dSSatish Balay   }
1998d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1999d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
2000d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
20019a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2002d9b7c43dSSatish Balay 
2003d9b7c43dSSatish Balay   return 0;
2004d9b7c43dSSatish Balay }
20051c351548SSatish Balay 
20065615d1e5SSatish Balay #undef __FUNC__
2007d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ"
2008ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
2009ba4ca20aSSatish Balay {
2010ba4ca20aSSatish Balay   static int called = 0;
2011ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
2012ba4ca20aSSatish Balay 
2013ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
2014ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2015ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
2016ba4ca20aSSatish Balay   return 0;
2017ba4ca20aSSatish Balay }
2018d9b7c43dSSatish Balay 
20192593348eSBarry Smith /* -------------------------------------------------------------------*/
2020cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
20219867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2022f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2023faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
20241a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
2025b6490206SBarry Smith        0,0,
2026de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
2027b6490206SBarry Smith        0,
2028f2501298SSatish Balay        MatTranspose_SeqBAIJ,
202917e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
20309867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2031584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2032b6490206SBarry Smith        0,
2033d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20347fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2035b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2036de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2037d402145bSBarry Smith        0,0,
2038b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2039b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2040af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20417e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2042ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20433b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20443b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
204592c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
204692c4ed94SBarry Smith        0,0,0,0,0,0,
204792c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20482593348eSBarry Smith 
20495615d1e5SSatish Balay #undef __FUNC__
20505615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20512593348eSBarry Smith /*@C
205244cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
205344cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
205444cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20552bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20562bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20572593348eSBarry Smith 
20582593348eSBarry Smith    Input Parameters:
2059029af93fSBarry Smith .  comm - MPI communicator, set to PETSC_COMM_SELF
2060b6490206SBarry Smith .  bs - size of block
20612593348eSBarry Smith .  m - number of rows
20622593348eSBarry Smith .  n - number of columns
2063b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20642bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20652bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20662593348eSBarry Smith 
20672593348eSBarry Smith    Output Parameter:
20682593348eSBarry Smith .  A - the matrix
20692593348eSBarry Smith 
20700513a670SBarry Smith    Options Database Keys:
20710513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
2072*0effe34fSLois Curfman McInnes $                     block calculations (much slower)
20730513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20740513a670SBarry Smith 
20752593348eSBarry Smith    Notes:
207644cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20772593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
207844cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20792593348eSBarry Smith 
20802593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20812593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20822593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20836da5968aSLois Curfman McInnes    matrices.
20842593348eSBarry Smith 
208544cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20862593348eSBarry Smith @*/
2087b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20882593348eSBarry Smith {
20892593348eSBarry Smith   Mat         B;
2090b6490206SBarry Smith   Mat_SeqBAIJ *b;
20913b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20923b2fbd54SBarry Smith 
20933b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2094e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2095b6490206SBarry Smith 
20960513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20970513a670SBarry Smith 
2098f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2099e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
21002593348eSBarry Smith 
21012593348eSBarry Smith   *A = 0;
2102d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
21032593348eSBarry Smith   PLogObjectCreate(B);
2104b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
210544cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
21062593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
21071a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
21081a6a6d98SBarry Smith   if (!flg) {
21097fc0212eSBarry Smith     switch (bs) {
21107fc0212eSBarry Smith     case 1:
21117fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21121a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
211339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2114f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
21157fc0212eSBarry Smith       break;
21164eeb42bcSBarry Smith     case 2:
21174eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
21181a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
211939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2120f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
21216d84be18SBarry Smith       break;
2122f327dd97SBarry Smith     case 3:
2123f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
21241a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
212539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2126f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
21274eeb42bcSBarry Smith       break;
21286d84be18SBarry Smith     case 4:
21296d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
21301a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
213139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2132f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21336d84be18SBarry Smith       break;
21346d84be18SBarry Smith     case 5:
21356d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21361a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
213739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2138f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21396d84be18SBarry Smith       break;
214048e9ddb2SSatish Balay     case 7:
214148e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
214248e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
214348e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
214448e9ddb2SSatish Balay       break;
21457fc0212eSBarry Smith     }
21461a6a6d98SBarry Smith   }
2147b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2148b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21492593348eSBarry Smith   B->factor           = 0;
21502593348eSBarry Smith   B->lupivotthreshold = 1.0;
215190f02eecSBarry Smith   B->mapping          = 0;
21522593348eSBarry Smith   b->row              = 0;
21532593348eSBarry Smith   b->col              = 0;
21542593348eSBarry Smith   b->reallocs         = 0;
21552593348eSBarry Smith 
215644cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
215744cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2158b6490206SBarry Smith   b->mbs     = mbs;
2159f2501298SSatish Balay   b->nbs     = nbs;
2160b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21612593348eSBarry Smith   if (nnz == PETSC_NULL) {
2162119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21632593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2164b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2165b6490206SBarry Smith     nz = nz*mbs;
21662593348eSBarry Smith   }
21672593348eSBarry Smith   else {
21682593348eSBarry Smith     nz = 0;
2169b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21702593348eSBarry Smith   }
21712593348eSBarry Smith 
21722593348eSBarry Smith   /* allocate the matrix space */
21737e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21742593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21757e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21767e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21772593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21782593348eSBarry Smith   b->i  = b->j + nz;
21792593348eSBarry Smith   b->singlemalloc = 1;
21802593348eSBarry Smith 
2181de6a44a3SBarry Smith   b->i[0] = 0;
2182b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21832593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21842593348eSBarry Smith   }
21852593348eSBarry Smith 
2186b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2187b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2188f09e8eb9SSatish Balay   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2189b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21902593348eSBarry Smith 
2191b6490206SBarry Smith   b->bs               = bs;
21929df24120SSatish Balay   b->bs2              = bs2;
2193b6490206SBarry Smith   b->mbs              = mbs;
21942593348eSBarry Smith   b->nz               = 0;
219518e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21962593348eSBarry Smith   b->sorted           = 0;
21972593348eSBarry Smith   b->roworiented      = 1;
21982593348eSBarry Smith   b->nonew            = 0;
21992593348eSBarry Smith   b->diag             = 0;
22002593348eSBarry Smith   b->solve_work       = 0;
2201de6a44a3SBarry Smith   b->mult_work        = 0;
22022593348eSBarry Smith   b->spptr            = 0;
22034e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
22044e220ebcSLois Curfman McInnes 
22052593348eSBarry Smith   *A = B;
22062593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
22072593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
22082593348eSBarry Smith   return 0;
22092593348eSBarry Smith }
22102593348eSBarry Smith 
22115615d1e5SSatish Balay #undef __FUNC__
22125615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2213b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
22142593348eSBarry Smith {
22152593348eSBarry Smith   Mat         C;
2216b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
22179df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2218de6a44a3SBarry Smith 
2219e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
22202593348eSBarry Smith 
22212593348eSBarry Smith   *B = 0;
2222d4bb536fSBarry Smith   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
22232593348eSBarry Smith   PLogObjectCreate(C);
2224b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
22252593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2226b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2227b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
22282593348eSBarry Smith   C->factor     = A->factor;
22292593348eSBarry Smith   c->row        = 0;
22302593348eSBarry Smith   c->col        = 0;
22312593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22322593348eSBarry Smith 
223344cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
223444cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
223544cd7ae7SLois Curfman McInnes   C->M          = a->m;
223644cd7ae7SLois Curfman McInnes   C->N          = a->n;
223744cd7ae7SLois Curfman McInnes 
2238b6490206SBarry Smith   c->bs         = a->bs;
22399df24120SSatish Balay   c->bs2        = a->bs2;
2240b6490206SBarry Smith   c->mbs        = a->mbs;
2241b6490206SBarry Smith   c->nbs        = a->nbs;
22422593348eSBarry Smith 
2243b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2244b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2245b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22462593348eSBarry Smith     c->imax[i] = a->imax[i];
22472593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22482593348eSBarry Smith   }
22492593348eSBarry Smith 
22502593348eSBarry Smith   /* allocate the matrix space */
22512593348eSBarry Smith   c->singlemalloc = 1;
22527e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22532593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22547e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2255de6a44a3SBarry Smith   c->i  = c->j + nz;
2256b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2257b6490206SBarry Smith   if (mbs > 0) {
2258de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22592593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22607e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22612593348eSBarry Smith     }
22622593348eSBarry Smith   }
22632593348eSBarry Smith 
2264f09e8eb9SSatish Balay   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
22652593348eSBarry Smith   c->sorted      = a->sorted;
22662593348eSBarry Smith   c->roworiented = a->roworiented;
22672593348eSBarry Smith   c->nonew       = a->nonew;
22682593348eSBarry Smith 
22692593348eSBarry Smith   if (a->diag) {
2270b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2271b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2272b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22732593348eSBarry Smith       c->diag[i] = a->diag[i];
22742593348eSBarry Smith     }
22752593348eSBarry Smith   }
22762593348eSBarry Smith   else c->diag          = 0;
22772593348eSBarry Smith   c->nz                 = a->nz;
22782593348eSBarry Smith   c->maxnz              = a->maxnz;
22792593348eSBarry Smith   c->solve_work         = 0;
22802593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22817fc0212eSBarry Smith   c->mult_work          = 0;
22822593348eSBarry Smith   *B = C;
22832593348eSBarry Smith   return 0;
22842593348eSBarry Smith }
22852593348eSBarry Smith 
22865615d1e5SSatish Balay #undef __FUNC__
22875615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
228819bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22892593348eSBarry Smith {
2290b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22912593348eSBarry Smith   Mat          B;
2292de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2293b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
229435aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2295a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2296b6490206SBarry Smith   Scalar       *aa;
229719bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22982593348eSBarry Smith 
22991a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2300de6a44a3SBarry Smith   bs2  = bs*bs;
2301b6490206SBarry Smith 
23022593348eSBarry Smith   MPI_Comm_size(comm,&size);
2303e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
230490ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
230577c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2306e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
23072593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
23082593348eSBarry Smith 
2309e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
231035aab85fSBarry Smith 
231135aab85fSBarry Smith   /*
231235aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
231335aab85fSBarry Smith     divisible by the blocksize
231435aab85fSBarry Smith   */
2315b6490206SBarry Smith   mbs        = M/bs;
231635aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
231735aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
231835aab85fSBarry Smith   else                  mbs++;
231935aab85fSBarry Smith   if (extra_rows) {
2320537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
232135aab85fSBarry Smith   }
2322b6490206SBarry Smith 
23232593348eSBarry Smith   /* read in row lengths */
232435aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
232577c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
232635aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
23272593348eSBarry Smith 
2328b6490206SBarry Smith   /* read in column indices */
232935aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
233077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
233135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2332b6490206SBarry Smith 
2333b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2334b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2335b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
233635aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
233735aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
233835aab85fSBarry Smith   masked = mask + mbs;
2339b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2340b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
234135aab85fSBarry Smith     nmask = 0;
2342b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2343b6490206SBarry Smith       kmax = rowlengths[rowcount];
2344b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
234535aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
234635aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2347b6490206SBarry Smith       }
2348b6490206SBarry Smith       rowcount++;
2349b6490206SBarry Smith     }
235035aab85fSBarry Smith     browlengths[i] += nmask;
235135aab85fSBarry Smith     /* zero out the mask elements we set */
235235aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2353b6490206SBarry Smith   }
2354b6490206SBarry Smith 
23552593348eSBarry Smith   /* create our matrix */
23563eee16b0SBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
23572593348eSBarry Smith   B = *A;
2358b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23592593348eSBarry Smith 
23602593348eSBarry Smith   /* set matrix "i" values */
2361de6a44a3SBarry Smith   a->i[0] = 0;
2362b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2363b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2364b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23652593348eSBarry Smith   }
2366b6490206SBarry Smith   a->nz         = 0;
2367b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23682593348eSBarry Smith 
2369b6490206SBarry Smith   /* read in nonzero values */
237035aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
237177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
237235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2373b6490206SBarry Smith 
2374b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2375b6490206SBarry Smith   nzcount = 0; jcount = 0;
2376b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2377b6490206SBarry Smith     nzcountb = nzcount;
237835aab85fSBarry Smith     nmask    = 0;
2379b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2380b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2381b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
238235aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
238335aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2384b6490206SBarry Smith       }
2385b6490206SBarry Smith       rowcount++;
2386b6490206SBarry Smith     }
2387de6a44a3SBarry Smith     /* sort the masked values */
238877c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2389de6a44a3SBarry Smith 
2390b6490206SBarry Smith     /* set "j" values into matrix */
2391b6490206SBarry Smith     maskcount = 1;
239235aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
239335aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2394de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2395b6490206SBarry Smith     }
2396b6490206SBarry Smith     /* set "a" values into matrix */
2397de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2398b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2399b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2400b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2401de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2402de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2403de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2404de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2405b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2406b6490206SBarry Smith       }
2407b6490206SBarry Smith     }
240835aab85fSBarry Smith     /* zero out the mask elements we set */
240935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2410b6490206SBarry Smith   }
2411e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2412b6490206SBarry Smith 
2413b6490206SBarry Smith   PetscFree(rowlengths);
2414b6490206SBarry Smith   PetscFree(browlengths);
2415b6490206SBarry Smith   PetscFree(aa);
2416b6490206SBarry Smith   PetscFree(jj);
2417b6490206SBarry Smith   PetscFree(mask);
2418b6490206SBarry Smith 
2419b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2420de6a44a3SBarry Smith 
2421de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2422de6a44a3SBarry Smith   if (flg) {
242319bcc07fSBarry Smith     Viewer tviewer;
242419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2425639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
242619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
242719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2428de6a44a3SBarry Smith   }
2429de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2430de6a44a3SBarry Smith   if (flg) {
243119bcc07fSBarry Smith     Viewer tviewer;
243219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2433639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
243419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
243519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2436de6a44a3SBarry Smith   }
2437de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2438de6a44a3SBarry Smith   if (flg) {
243919bcc07fSBarry Smith     Viewer tviewer;
244019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
244119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
244219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2443de6a44a3SBarry Smith   }
2444de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2445de6a44a3SBarry Smith   if (flg) {
244619bcc07fSBarry Smith     Viewer tviewer;
244719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2448639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
244919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
245019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2451de6a44a3SBarry Smith   }
2452de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2453de6a44a3SBarry Smith   if (flg) {
245419bcc07fSBarry Smith     Viewer tviewer;
245519bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
245619bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
245719bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
245819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2459de6a44a3SBarry Smith   }
24602593348eSBarry Smith   return 0;
24612593348eSBarry Smith }
24622593348eSBarry Smith 
24632593348eSBarry Smith 
24642593348eSBarry Smith 
2465