xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cc0fae110111c7ed8c32c86910770502c6cc577e)
19c01be13SBarry Smith 
2a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
3*cc0fae11SBarry Smith static char vcid[] = "$Id: baij.c,v 1.118 1997/12/01 01:55:02 bsmith Exp bsmith $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
103369ce9aSBarry Smith 
113369ce9aSBarry Smith #include "pinclude/pviewer.h"
123369ce9aSBarry Smith #include "sys.h"
1370f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
141a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
151a6a6d98SBarry Smith #include "src/inline/spops.h"
162593348eSBarry Smith #include "petsc.h"
173b547af2SSatish Balay 
182593348eSBarry Smith 
19de6a44a3SBarry Smith /*
20de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
21de6a44a3SBarry Smith */
22de6a44a3SBarry Smith 
235615d1e5SSatish Balay #undef __FUNC__
245615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
25de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
26de6a44a3SBarry Smith {
27de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
287fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
29de6a44a3SBarry Smith 
303a40ed3dSBarry Smith   PetscFunctionBegin;
31de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
32de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
337fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
34de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
35de6a44a3SBarry Smith       if (a->j[j] == i) {
36de6a44a3SBarry Smith         diag[i] = j;
37de6a44a3SBarry Smith         break;
38de6a44a3SBarry Smith       }
39de6a44a3SBarry Smith     }
40de6a44a3SBarry Smith   }
41de6a44a3SBarry Smith   a->diag = diag;
423a40ed3dSBarry Smith   PetscFunctionReturn(0);
43de6a44a3SBarry Smith }
442593348eSBarry Smith 
452593348eSBarry Smith 
463b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
473b2fbd54SBarry Smith 
485615d1e5SSatish Balay #undef __FUNC__
495615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
503b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
513b2fbd54SBarry Smith                             PetscTruth *done)
523b2fbd54SBarry Smith {
533b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
543b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
553b2fbd54SBarry Smith 
563a40ed3dSBarry Smith   PetscFunctionBegin;
573b2fbd54SBarry Smith   *nn = n;
583a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
593b2fbd54SBarry Smith   if (symmetric) {
603b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
613b2fbd54SBarry Smith   } else if (oshift == 1) {
623b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
633b2fbd54SBarry Smith     int nz = a->i[n] + 1;
643b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
653b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
663b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
673b2fbd54SBarry Smith   } else {
683b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
693b2fbd54SBarry Smith   }
703b2fbd54SBarry Smith 
713a40ed3dSBarry Smith   PetscFunctionReturn(0);
723b2fbd54SBarry Smith }
733b2fbd54SBarry Smith 
745615d1e5SSatish Balay #undef __FUNC__
75d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
763b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
773b2fbd54SBarry Smith                                 PetscTruth *done)
783b2fbd54SBarry Smith {
793b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
803b2fbd54SBarry Smith   int         i,n = a->mbs;
813b2fbd54SBarry Smith 
823a40ed3dSBarry Smith   PetscFunctionBegin;
833a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
843b2fbd54SBarry Smith   if (symmetric) {
853b2fbd54SBarry Smith     PetscFree(*ia);
863b2fbd54SBarry Smith     PetscFree(*ja);
87af5da2bfSBarry Smith   } else if (oshift == 1) {
883b2fbd54SBarry Smith     int nz = a->i[n];
893b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
903b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
913b2fbd54SBarry Smith   }
923a40ed3dSBarry Smith   PetscFunctionReturn(0);
933b2fbd54SBarry Smith }
943b2fbd54SBarry Smith 
953b2fbd54SBarry Smith 
965615d1e5SSatish Balay #undef __FUNC__
97d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary"
98b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
992593348eSBarry Smith {
100b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1019df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102b6490206SBarry Smith   Scalar      *aa;
1032593348eSBarry Smith 
1043a40ed3dSBarry Smith   PetscFunctionBegin;
10590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1062593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1072593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1083b2fbd54SBarry Smith 
1092593348eSBarry Smith   col_lens[1] = a->m;
1102593348eSBarry Smith   col_lens[2] = a->n;
1117e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1122593348eSBarry Smith 
1132593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
114b6490206SBarry Smith   count = 0;
115b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
116b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
117b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118b6490206SBarry Smith     }
1192593348eSBarry Smith   }
1200752156aSBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
1212593348eSBarry Smith   PetscFree(col_lens);
1222593348eSBarry Smith 
1232593348eSBarry Smith   /* store column indices (zero start index) */
12466963ce1SSatish Balay   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
125b6490206SBarry Smith   count = 0;
126b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
127b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
128b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
130b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1312593348eSBarry Smith         }
1322593348eSBarry Smith       }
133b6490206SBarry Smith     }
134b6490206SBarry Smith   }
1350752156aSBarry Smith   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
136b6490206SBarry Smith   PetscFree(jj);
1372593348eSBarry Smith 
1382593348eSBarry Smith   /* store nonzero values */
13966963ce1SSatish Balay   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140b6490206SBarry Smith   count = 0;
141b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
142b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
143b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1457e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
146b6490206SBarry Smith         }
147b6490206SBarry Smith       }
148b6490206SBarry Smith     }
149b6490206SBarry Smith   }
1500752156aSBarry Smith   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
151b6490206SBarry Smith   PetscFree(aa);
1523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1532593348eSBarry Smith }
1542593348eSBarry Smith 
1555615d1e5SSatish Balay #undef __FUNC__
156d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII"
157b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1582593348eSBarry Smith {
159b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1609df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1612593348eSBarry Smith   FILE        *fd;
1622593348eSBarry Smith   char        *outputname;
1632593348eSBarry Smith 
1643a40ed3dSBarry Smith   PetscFunctionBegin;
16590ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1662593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16790ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
168639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1697ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1702593348eSBarry Smith   }
171639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
172a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported");
1732593348eSBarry Smith   }
174639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
17544cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17644cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17744cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17844cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17944cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
1803a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
181766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
18344cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
184766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
185766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
186766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
18744cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18844cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18944cd7ae7SLois Curfman McInnes #else
19044cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
19144cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
19244cd7ae7SLois Curfman McInnes #endif
19344cd7ae7SLois Curfman McInnes           }
19444cd7ae7SLois Curfman McInnes         }
19544cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19644cd7ae7SLois Curfman McInnes       }
19744cd7ae7SLois Curfman McInnes     }
19844cd7ae7SLois Curfman McInnes   }
1992593348eSBarry Smith   else {
200b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
201b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
202b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
203b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
204b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
2053a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
206766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
20788685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2087e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20988685aaeSLois Curfman McInnes           }
210766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
211766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
212766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
213766eeae4SLois Curfman McInnes           }
21488685aaeSLois Curfman McInnes           else {
2157e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
21688685aaeSLois Curfman McInnes           }
21788685aaeSLois Curfman McInnes #else
2187e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
21988685aaeSLois Curfman McInnes #endif
2202593348eSBarry Smith           }
2212593348eSBarry Smith         }
2222593348eSBarry Smith         fprintf(fd,"\n");
2232593348eSBarry Smith       }
2242593348eSBarry Smith     }
225b6490206SBarry Smith   }
2262593348eSBarry Smith   fflush(fd);
2273a40ed3dSBarry Smith   PetscFunctionReturn(0);
2282593348eSBarry Smith }
2292593348eSBarry Smith 
2305615d1e5SSatish Balay #undef __FUNC__
231d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw"
2323270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2333270192aSSatish Balay {
2343270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2353270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2363270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2373270192aSSatish Balay   Scalar       *aa;
2383270192aSSatish Balay   Draw         draw;
2393270192aSSatish Balay   DrawButton   button;
2403270192aSSatish Balay   PetscTruth   isnull;
2413270192aSSatish Balay 
2423a40ed3dSBarry Smith   PetscFunctionBegin;
2433a40ed3dSBarry Smith   ierr = ViewerDrawGetDraw(viewer,&draw);CHKERRQ(ierr);
2443a40ed3dSBarry Smith   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
2453270192aSSatish Balay 
2463270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2473270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2483270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2493270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2503270192aSSatish Balay   color = DRAW_BLUE;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
259b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay   color = DRAW_CYAN;
2653270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2663270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2673270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2683270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2693270192aSSatish Balay       aa = a->a + j*bs2;
2703270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2713270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2723270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
273b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2743270192aSSatish Balay         }
2753270192aSSatish Balay       }
2763270192aSSatish Balay     }
2773270192aSSatish Balay   }
2783270192aSSatish Balay 
2793270192aSSatish Balay   color = DRAW_RED;
2803270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2813270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2823270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2833270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2843270192aSSatish Balay       aa = a->a + j*bs2;
2853270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2863270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2873270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
288b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2893270192aSSatish Balay         }
2903270192aSSatish Balay       }
2913270192aSSatish Balay     }
2923270192aSSatish Balay   }
2933270192aSSatish Balay 
29455843e3eSBarry Smith   DrawSynchronizedFlush(draw);
2953270192aSSatish Balay   DrawGetPause(draw,&pause);
2963a40ed3dSBarry Smith   if (pause >= 0) { PetscSleep(pause); PetscFunctionReturn(0);}
2973270192aSSatish Balay 
2983270192aSSatish Balay   /* allow the matrix to zoom or shrink */
29955843e3eSBarry Smith   ierr = DrawSynchronizedGetMouseButton(draw,&button,&xc,&yc,0,0);
3003270192aSSatish Balay   while (button != BUTTON_RIGHT) {
30155843e3eSBarry Smith     DrawSynchronizedClear(draw);
3023270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
3033270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
3043270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
3053270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
3063270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
3073270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
3083270192aSSatish Balay     w *= scale; h *= scale;
3093270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
3103270192aSSatish Balay     color = DRAW_BLUE;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
319b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay     color = DRAW_CYAN;
3253270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3263270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3273270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3283270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3293270192aSSatish Balay         aa = a->a + j*bs2;
3303270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3313270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3323270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
333b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3343270192aSSatish Balay           }
3353270192aSSatish Balay         }
3363270192aSSatish Balay       }
3373270192aSSatish Balay     }
3383270192aSSatish Balay 
3393270192aSSatish Balay     color = DRAW_RED;
3403270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3413270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3423270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3433270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3443270192aSSatish Balay         aa = a->a + j*bs2;
3453270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3463270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3473270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
348b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3493270192aSSatish Balay           }
3503270192aSSatish Balay         }
3513270192aSSatish Balay       }
3523270192aSSatish Balay     }
35355843e3eSBarry Smith     ierr = DrawSynchronizedGetMouseButton(draw,&button,&xc,&yc,0,0);
3543270192aSSatish Balay   }
3553a40ed3dSBarry Smith   PetscFunctionReturn(0);
3563270192aSSatish Balay }
3573270192aSSatish Balay 
3585615d1e5SSatish Balay #undef __FUNC__
359d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ"
3608f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3612593348eSBarry Smith {
3622593348eSBarry Smith   Mat         A = (Mat) obj;
36319bcc07fSBarry Smith   ViewerType  vtype;
36419bcc07fSBarry Smith   int         ierr;
3652593348eSBarry Smith 
3663a40ed3dSBarry Smith   PetscFunctionBegin;
36719bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36819bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
369a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"Matlab viewer not supported");
3703a40ed3dSBarry Smith   } else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
3713a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
3723a40ed3dSBarry Smith   } else if (vtype == BINARY_FILE_VIEWER) {
3733a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
3743a40ed3dSBarry Smith   } else if (vtype == DRAW_VIEWER) {
3753a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
3762593348eSBarry Smith   }
3773a40ed3dSBarry Smith   PetscFunctionReturn(0);
3782593348eSBarry Smith }
379b6490206SBarry Smith 
380119a934aSSatish Balay #define CHUNKSIZE  10
381cd0e1443SSatish Balay 
3825615d1e5SSatish Balay #undef __FUNC__
3835615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
384639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
385cd0e1443SSatish Balay {
386cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
387cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
388cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
389a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3909df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
391cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
392cd0e1443SSatish Balay 
3933a40ed3dSBarry Smith   PetscFunctionBegin;
394cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
395cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3963a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
397a8c6a408SBarry Smith     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
398a8c6a408SBarry Smith     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
3993b2fbd54SBarry Smith #endif
4002c3acbe9SBarry Smith     rp   = aj + ai[brow];
4012c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
4022c3acbe9SBarry Smith     rmax = imax[brow];
4032c3acbe9SBarry Smith     nrow = ailen[brow];
404cd0e1443SSatish Balay     low  = 0;
405cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
4063a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
407a8c6a408SBarry Smith       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
408a8c6a408SBarry Smith       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
4093b2fbd54SBarry Smith #endif
410a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
411cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
412cd0e1443SSatish Balay       if (roworiented) {
413cd0e1443SSatish Balay         value = *v++;
4143a40ed3dSBarry Smith       } else {
415cd0e1443SSatish Balay         value = v[k + l*m];
416cd0e1443SSatish Balay       }
417cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4182c3acbe9SBarry Smith       while (high-low > 7) {
419cd0e1443SSatish Balay         t = (low+high)/2;
420cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
421cd0e1443SSatish Balay         else              low  = t;
422cd0e1443SSatish Balay       }
423cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
424cd0e1443SSatish Balay         if (rp[i] > bcol) break;
425cd0e1443SSatish Balay         if (rp[i] == bcol) {
4267e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
427cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
428cd0e1443SSatish Balay           else                  *bap  = value;
429f1241b54SBarry Smith           goto noinsert1;
430cd0e1443SSatish Balay         }
431cd0e1443SSatish Balay       }
432c2653b3dSLois Curfman McInnes       if (nonew == 1) goto noinsert1;
433a8c6a408SBarry Smith       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
434cd0e1443SSatish Balay       if (nrow >= rmax) {
435cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
436cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
437cd0e1443SSatish Balay         Scalar *new_a;
438cd0e1443SSatish Balay 
439a8c6a408SBarry Smith         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
44096854ed6SLois Curfman McInnes 
44196854ed6SLois Curfman McInnes         /* Malloc new storage space */
4427e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
443cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4447e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
445cd0e1443SSatish Balay         new_i   = new_j + new_nz;
446cd0e1443SSatish Balay 
447cd0e1443SSatish Balay         /* copy over old data into new slots */
448cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4497e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
450a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
451a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
452a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
453cd0e1443SSatish Balay                                                            len*sizeof(int));
454a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
455a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
456a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
457a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
458cd0e1443SSatish Balay         /* free up old matrix storage */
459cd0e1443SSatish Balay         PetscFree(a->a);
460cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
461cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
462cd0e1443SSatish Balay         a->singlemalloc = 1;
463cd0e1443SSatish Balay 
464a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
465cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4667e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
46718e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
468cd0e1443SSatish Balay         a->reallocs++;
469119a934aSSatish Balay         a->nz++;
470cd0e1443SSatish Balay       }
4717e67e3f9SSatish Balay       N = nrow++ - 1;
472cd0e1443SSatish Balay       /* shift up all the later entries in this row */
473cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
474cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4757e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
476cd0e1443SSatish Balay       }
4777e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
478cd0e1443SSatish Balay       rp[i]                      = bcol;
4797e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
480f1241b54SBarry Smith       noinsert1:;
481cd0e1443SSatish Balay       low = i;
482cd0e1443SSatish Balay     }
483cd0e1443SSatish Balay     ailen[brow] = nrow;
484cd0e1443SSatish Balay   }
4853a40ed3dSBarry Smith   PetscFunctionReturn(0);
486cd0e1443SSatish Balay }
487cd0e1443SSatish Balay 
4885615d1e5SSatish Balay #undef __FUNC__
48905a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
49092c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
49192c4ed94SBarry Smith {
49292c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4938a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
494dd9472c6SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
495dd9472c6SBarry Smith   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
4960e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
49792c4ed94SBarry Smith 
4983a40ed3dSBarry Smith   PetscFunctionBegin;
4990e324ae4SSatish Balay   if (roworiented) {
5000e324ae4SSatish Balay     stepval = (n-1)*bs;
5010e324ae4SSatish Balay   } else {
5020e324ae4SSatish Balay     stepval = (m-1)*bs;
5030e324ae4SSatish Balay   }
50492c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
50592c4ed94SBarry Smith     row  = im[k];
5063a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
507a8c6a408SBarry Smith     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
508a8c6a408SBarry Smith     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
50992c4ed94SBarry Smith #endif
51092c4ed94SBarry Smith     rp   = aj + ai[row];
51192c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
51292c4ed94SBarry Smith     rmax = imax[row];
51392c4ed94SBarry Smith     nrow = ailen[row];
51492c4ed94SBarry Smith     low  = 0;
51592c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
5163a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
517a8c6a408SBarry Smith       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
518a8c6a408SBarry Smith       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
51992c4ed94SBarry Smith #endif
52092c4ed94SBarry Smith       col = in[l];
52192c4ed94SBarry Smith       if (roworiented) {
5220e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5230e324ae4SSatish Balay       } else {
5240e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
52592c4ed94SBarry Smith       }
52692c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
52792c4ed94SBarry Smith       while (high-low > 7) {
52892c4ed94SBarry Smith         t = (low+high)/2;
52992c4ed94SBarry Smith         if (rp[t] > col) high = t;
53092c4ed94SBarry Smith         else             low  = t;
53192c4ed94SBarry Smith       }
53292c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
53392c4ed94SBarry Smith         if (rp[i] > col) break;
53492c4ed94SBarry Smith         if (rp[i] == col) {
5358a84c255SSatish Balay           bap  = ap +  bs2*i;
5360e324ae4SSatish Balay           if (roworiented) {
5378a84c255SSatish Balay             if (is == ADD_VALUES) {
538dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
539dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5408a84c255SSatish Balay                   bap[jj] += *value++;
541dd9472c6SBarry Smith                 }
542dd9472c6SBarry Smith               }
5430e324ae4SSatish Balay             } else {
544dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
545dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5460e324ae4SSatish Balay                   bap[jj] = *value++;
5478a84c255SSatish Balay                 }
548dd9472c6SBarry Smith               }
549dd9472c6SBarry Smith             }
5500e324ae4SSatish Balay           } else {
5510e324ae4SSatish Balay             if (is == ADD_VALUES) {
552dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
553dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5540e324ae4SSatish Balay                   *bap++ += *value++;
555dd9472c6SBarry Smith                 }
556dd9472c6SBarry Smith               }
5570e324ae4SSatish Balay             } else {
558dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
559dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5600e324ae4SSatish Balay                   *bap++  = *value++;
5610e324ae4SSatish Balay                 }
5628a84c255SSatish Balay               }
563dd9472c6SBarry Smith             }
564dd9472c6SBarry Smith           }
565f1241b54SBarry Smith           goto noinsert2;
56692c4ed94SBarry Smith         }
56792c4ed94SBarry Smith       }
56889280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
569a8c6a408SBarry Smith       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
57092c4ed94SBarry Smith       if (nrow >= rmax) {
57192c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
57292c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
57392c4ed94SBarry Smith         Scalar *new_a;
57492c4ed94SBarry Smith 
575a8c6a408SBarry Smith         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
57689280ab3SLois Curfman McInnes 
57792c4ed94SBarry Smith         /* malloc new storage space */
57892c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
57992c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
58092c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
58192c4ed94SBarry Smith         new_i   = new_j + new_nz;
58292c4ed94SBarry Smith 
58392c4ed94SBarry Smith         /* copy over old data into new slots */
58492c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
58592c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
58692c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
58792c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
588dd9472c6SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
58992c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
59092c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
591dd9472c6SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
59292c4ed94SBarry Smith         /* free up old matrix storage */
59392c4ed94SBarry Smith         PetscFree(a->a);
59492c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
59592c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
59692c4ed94SBarry Smith         a->singlemalloc = 1;
59792c4ed94SBarry Smith 
59892c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
59992c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
60092c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
60192c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
60292c4ed94SBarry Smith         a->reallocs++;
60392c4ed94SBarry Smith         a->nz++;
60492c4ed94SBarry Smith       }
60592c4ed94SBarry Smith       N = nrow++ - 1;
60692c4ed94SBarry Smith       /* shift up all the later entries in this row */
60792c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
60892c4ed94SBarry Smith         rp[ii+1] = rp[ii];
60992c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
61092c4ed94SBarry Smith       }
61192c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
61292c4ed94SBarry Smith       rp[i] = col;
6138a84c255SSatish Balay       bap   = ap +  bs2*i;
6140e324ae4SSatish Balay       if (roworiented) {
615dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval) {
616dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs ) {
6170e324ae4SSatish Balay             bap[jj] = *value++;
618dd9472c6SBarry Smith           }
619dd9472c6SBarry Smith         }
6200e324ae4SSatish Balay       } else {
621dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval ) {
622dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++ ) {
6230e324ae4SSatish Balay             *bap++  = *value++;
6240e324ae4SSatish Balay           }
625dd9472c6SBarry Smith         }
626dd9472c6SBarry Smith       }
627f1241b54SBarry Smith       noinsert2:;
62892c4ed94SBarry Smith       low = i;
62992c4ed94SBarry Smith     }
63092c4ed94SBarry Smith     ailen[row] = nrow;
63192c4ed94SBarry Smith   }
6323a40ed3dSBarry Smith   PetscFunctionReturn(0);
63392c4ed94SBarry Smith }
63492c4ed94SBarry Smith 
63592c4ed94SBarry Smith #undef __FUNC__
636d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ"
6378f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6380b824a48SSatish Balay {
6390b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6403a40ed3dSBarry Smith 
6413a40ed3dSBarry Smith   PetscFunctionBegin;
6420752156aSBarry Smith   if (m) *m = a->m;
6430752156aSBarry Smith   if (n) *n = a->n;
6443a40ed3dSBarry Smith   PetscFunctionReturn(0);
6450b824a48SSatish Balay }
6460b824a48SSatish Balay 
6475615d1e5SSatish Balay #undef __FUNC__
648d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
6498f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6500b824a48SSatish Balay {
6510b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6523a40ed3dSBarry Smith 
6533a40ed3dSBarry Smith   PetscFunctionBegin;
6540b824a48SSatish Balay   *m = 0; *n = a->m;
6553a40ed3dSBarry Smith   PetscFunctionReturn(0);
6560b824a48SSatish Balay }
6570b824a48SSatish Balay 
6585615d1e5SSatish Balay #undef __FUNC__
6595615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6609867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6619867e207SSatish Balay {
6629867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6637e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6649867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6659867e207SSatish Balay 
6663a40ed3dSBarry Smith   PetscFunctionBegin;
6679867e207SSatish Balay   bs  = a->bs;
6689867e207SSatish Balay   ai  = a->i;
6699867e207SSatish Balay   aj  = a->j;
6709867e207SSatish Balay   aa  = a->a;
6719df24120SSatish Balay   bs2 = a->bs2;
6729867e207SSatish Balay 
673a8c6a408SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
6749867e207SSatish Balay 
6759867e207SSatish Balay   bn  = row/bs;   /* Block number */
6769867e207SSatish Balay   bp  = row % bs; /* Block Position */
6779867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6789867e207SSatish Balay   *nz = bs*M;
6799867e207SSatish Balay 
6809867e207SSatish Balay   if (v) {
6819867e207SSatish Balay     *v = 0;
6829867e207SSatish Balay     if (*nz) {
6839867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6849867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6859867e207SSatish Balay         v_i  = *v + i*bs;
6867e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6877e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6889867e207SSatish Balay       }
6899867e207SSatish Balay     }
6909867e207SSatish Balay   }
6919867e207SSatish Balay 
6929867e207SSatish Balay   if (idx) {
6939867e207SSatish Balay     *idx = 0;
6949867e207SSatish Balay     if (*nz) {
6959867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6969867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6979867e207SSatish Balay         idx_i = *idx + i*bs;
6989867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6999867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
7009867e207SSatish Balay       }
7019867e207SSatish Balay     }
7029867e207SSatish Balay   }
7033a40ed3dSBarry Smith   PetscFunctionReturn(0);
7049867e207SSatish Balay }
7059867e207SSatish Balay 
7065615d1e5SSatish Balay #undef __FUNC__
707d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ"
7089867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
7099867e207SSatish Balay {
7103a40ed3dSBarry Smith   PetscFunctionBegin;
7119867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
7129867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
7133a40ed3dSBarry Smith   PetscFunctionReturn(0);
7149867e207SSatish Balay }
715b6490206SBarry Smith 
7165615d1e5SSatish Balay #undef __FUNC__
7175615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
7188f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B)
719f2501298SSatish Balay {
720f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
721f2501298SSatish Balay   Mat         C;
722f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
7239df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
724f2501298SSatish Balay   Scalar      *array=a->a;
725f2501298SSatish Balay 
7263a40ed3dSBarry Smith   PetscFunctionBegin;
727a8c6a408SBarry Smith   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
728f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
729f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
730a7c10996SSatish Balay 
731a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
732f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
733f2501298SSatish Balay   PetscFree(col);
734f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
735f2501298SSatish Balay   cols = rows + bs;
736f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
737f2501298SSatish Balay     cols[0] = i*bs;
738f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
739f2501298SSatish Balay     len = ai[i+1] - ai[i];
740f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
741f2501298SSatish Balay       rows[0] = (*aj++)*bs;
742f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
743f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
744f2501298SSatish Balay       array += bs2;
745f2501298SSatish Balay     }
746f2501298SSatish Balay   }
7471073c447SSatish Balay   PetscFree(rows);
748f2501298SSatish Balay 
7496d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7506d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
751f2501298SSatish Balay 
752f2501298SSatish Balay   if (B != PETSC_NULL) {
753f2501298SSatish Balay     *B = C;
754f2501298SSatish Balay   } else {
755f2501298SSatish Balay     /* This isn't really an in-place transpose */
756f2501298SSatish Balay     PetscFree(a->a);
757f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
758f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
759f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
760f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
761f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
762f2501298SSatish Balay     PetscFree(a);
763f09e8eb9SSatish Balay     PetscMemcpy(A,C,sizeof(struct _p_Mat));
764f2501298SSatish Balay     PetscHeaderDestroy(C);
765f2501298SSatish Balay   }
7663a40ed3dSBarry Smith   PetscFunctionReturn(0);
767f2501298SSatish Balay }
768f2501298SSatish Balay 
769f2501298SSatish Balay 
7705615d1e5SSatish Balay #undef __FUNC__
7715615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
7728f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
773584200bdSSatish Balay {
774584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
775584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
776a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
77743ee02c3SBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
778584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
779584200bdSSatish Balay 
7803a40ed3dSBarry Smith   PetscFunctionBegin;
7813a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
782584200bdSSatish Balay 
78343ee02c3SBarry Smith   if (m) rmax = ailen[0];
784584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
785584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
786584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
787d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
788584200bdSSatish Balay     if (fshift) {
789a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
790584200bdSSatish Balay       N = ailen[i];
791584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
792584200bdSSatish Balay         ip[j-fshift] = ip[j];
7937e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
794584200bdSSatish Balay       }
795584200bdSSatish Balay     }
796584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
797584200bdSSatish Balay   }
798584200bdSSatish Balay   if (mbs) {
799584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
800584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
801584200bdSSatish Balay   }
802584200bdSSatish Balay   /* reset ilen and imax for each row */
803584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
804584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
805584200bdSSatish Balay   }
806a7c10996SSatish Balay   a->nz = ai[mbs];
807584200bdSSatish Balay 
808584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
809584200bdSSatish Balay   if (fshift && a->diag) {
810584200bdSSatish Balay     PetscFree(a->diag);
811584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
812584200bdSSatish Balay     a->diag = 0;
813584200bdSSatish Balay   }
8143d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
815219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
8163d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
817584200bdSSatish Balay            a->reallocs);
818d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
819e2f3b5e9SSatish Balay   a->reallocs          = 0;
8204e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
8214e220ebcSLois Curfman McInnes 
8223a40ed3dSBarry Smith   PetscFunctionReturn(0);
823584200bdSSatish Balay }
824584200bdSSatish Balay 
8255615d1e5SSatish Balay #undef __FUNC__
8265615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
8278f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A)
8282593348eSBarry Smith {
829b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8303a40ed3dSBarry Smith 
8313a40ed3dSBarry Smith   PetscFunctionBegin;
8329df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
8333a40ed3dSBarry Smith   PetscFunctionReturn(0);
8342593348eSBarry Smith }
8352593348eSBarry Smith 
8365615d1e5SSatish Balay #undef __FUNC__
837d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ"
838b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8392593348eSBarry Smith {
8402593348eSBarry Smith   Mat         A  = (Mat) obj;
841b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8422593348eSBarry Smith 
8433a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
8442593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8452593348eSBarry Smith #endif
8462593348eSBarry Smith   PetscFree(a->a);
8472593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8482593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8492593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8502593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8512593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
852de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8532593348eSBarry Smith   PetscFree(a);
854f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
855f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8563a40ed3dSBarry Smith   PetscFunctionReturn(0);
8572593348eSBarry Smith }
8582593348eSBarry Smith 
8595615d1e5SSatish Balay #undef __FUNC__
860d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ"
8618f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8622593348eSBarry Smith {
863b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8643a40ed3dSBarry Smith 
8653a40ed3dSBarry Smith   PetscFunctionBegin;
8666d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
8676d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
8686d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
869219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
8706d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
871c2653b3dSLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
87296854ed6SLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
8736d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
8746d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
875219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8766d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8776d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
87890f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
8793a40ed3dSBarry Smith            op == MAT_IGNORE_OFF_PROC_ENTRIES) {
88094a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8813a40ed3dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
8823a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
8833a40ed3dSBarry Smith   } else {
8843a40ed3dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
8853a40ed3dSBarry Smith   }
8863a40ed3dSBarry Smith   PetscFunctionReturn(0);
8872593348eSBarry Smith }
8882593348eSBarry Smith 
8892593348eSBarry Smith 
8902593348eSBarry Smith /* -------------------------------------------------------*/
8912593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8922593348eSBarry Smith /* -------------------------------------------------------*/
893b6490206SBarry Smith #include "pinclude/plapack.h"
894b6490206SBarry Smith 
8955615d1e5SSatish Balay #undef __FUNC__
8965615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
89739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8982593348eSBarry Smith {
899b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
90039b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
901c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
9022593348eSBarry Smith 
9033a40ed3dSBarry Smith   PetscFunctionBegin;
904c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
905c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
906b6490206SBarry Smith 
907119a934aSSatish Balay   idx   = a->j;
908119a934aSSatish Balay   v     = a->a;
909119a934aSSatish Balay   ii    = a->i;
910119a934aSSatish Balay 
911119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
912119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
913119a934aSSatish Balay     sum  = 0.0;
914119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
9151a6a6d98SBarry Smith     z[i] = sum;
916119a934aSSatish Balay   }
917c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
918c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
91939b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
9203a40ed3dSBarry Smith   PetscFunctionReturn(0);
92139b95ed1SBarry Smith }
92239b95ed1SBarry Smith 
9235615d1e5SSatish Balay #undef __FUNC__
9245615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
92539b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
92639b95ed1SBarry Smith {
92739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
92839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
92939b95ed1SBarry Smith   register Scalar x1,x2;
9303a40ed3dSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
93139b95ed1SBarry Smith 
9323a40ed3dSBarry Smith   PetscFunctionBegin;
933c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
934c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
93539b95ed1SBarry Smith 
93639b95ed1SBarry Smith   idx   = a->j;
93739b95ed1SBarry Smith   v     = a->a;
93839b95ed1SBarry Smith   ii    = a->i;
93939b95ed1SBarry Smith 
940119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
941119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
942119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
943119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
944119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
945119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
946119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
947119a934aSSatish Balay       v += 4;
948119a934aSSatish Balay     }
9491a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
950119a934aSSatish Balay     z += 2;
951119a934aSSatish Balay   }
952c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
953c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
95439b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
9553a40ed3dSBarry Smith   PetscFunctionReturn(0);
95639b95ed1SBarry Smith }
95739b95ed1SBarry Smith 
9585615d1e5SSatish Balay #undef __FUNC__
9595615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
96039b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
96139b95ed1SBarry Smith {
96239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
96339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
964c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
96539b95ed1SBarry Smith 
9663a40ed3dSBarry Smith   PetscFunctionBegin;
967c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
968c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
96939b95ed1SBarry Smith 
97039b95ed1SBarry Smith   idx   = a->j;
97139b95ed1SBarry Smith   v     = a->a;
97239b95ed1SBarry Smith   ii    = a->i;
97339b95ed1SBarry Smith 
974119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
975119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
976119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
977119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
978119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
979119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
980119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
981119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
982119a934aSSatish Balay       v += 9;
983119a934aSSatish Balay     }
9841a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
985119a934aSSatish Balay     z += 3;
986119a934aSSatish Balay   }
987c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
988c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
98939b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
9903a40ed3dSBarry Smith   PetscFunctionReturn(0);
99139b95ed1SBarry Smith }
99239b95ed1SBarry Smith 
9935615d1e5SSatish Balay #undef __FUNC__
9945615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
99539b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
99639b95ed1SBarry Smith {
99739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
9983a40ed3dSBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
9993a40ed3dSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
100039b95ed1SBarry Smith 
10013a40ed3dSBarry Smith   PetscFunctionBegin;
1002c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1003c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
100439b95ed1SBarry Smith 
100539b95ed1SBarry Smith   idx   = a->j;
100639b95ed1SBarry Smith   v     = a->a;
100739b95ed1SBarry Smith   ii    = a->i;
100839b95ed1SBarry Smith 
1009119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1010119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1011119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
1012119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1013119a934aSSatish Balay       xb = x + 4*(*idx++);
1014119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1015119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1016119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1017119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1018119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1019119a934aSSatish Balay       v += 16;
1020119a934aSSatish Balay     }
10211a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1022119a934aSSatish Balay     z += 4;
1023119a934aSSatish Balay   }
1024c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1025c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
102639b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
10273a40ed3dSBarry Smith   PetscFunctionReturn(0);
102839b95ed1SBarry Smith }
102939b95ed1SBarry Smith 
10305615d1e5SSatish Balay #undef __FUNC__
10315615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
103239b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
103339b95ed1SBarry Smith {
103439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1035522c5e43SBarry Smith   register Scalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1036522c5e43SBarry Smith   register Scalar * restrict v,* restrict xb,* restrict z, * restrict x;
1037c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
103839b95ed1SBarry Smith 
1039c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1040c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
104139b95ed1SBarry Smith 
104239b95ed1SBarry Smith   idx   = a->j;
104339b95ed1SBarry Smith   v     = a->a;
104439b95ed1SBarry Smith   ii    = a->i;
104539b95ed1SBarry Smith 
1046119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1047119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1048119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1049119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1050119a934aSSatish Balay       xb = x + 5*(*idx++);
1051119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1052119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1053119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1054119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1055119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1056119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1057119a934aSSatish Balay       v += 25;
1058119a934aSSatish Balay     }
10591a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1060119a934aSSatish Balay     z += 5;
1061119a934aSSatish Balay   }
1062c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1063c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
106439b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
10653a40ed3dSBarry Smith   PetscFunctionReturn(0);
106639b95ed1SBarry Smith }
106739b95ed1SBarry Smith 
10685615d1e5SSatish Balay #undef __FUNC__
106948e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
107048e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
107148e9ddb2SSatish Balay {
107248e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
107348e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
107448e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
107548e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
107648e9ddb2SSatish Balay 
107748e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
107848e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
107948e9ddb2SSatish Balay 
108048e9ddb2SSatish Balay   idx   = a->j;
108148e9ddb2SSatish Balay   v     = a->a;
108248e9ddb2SSatish Balay   ii    = a->i;
108348e9ddb2SSatish Balay 
108448e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
108548e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
108648e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
108748e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
108848e9ddb2SSatish Balay       xb = x + 7*(*idx++);
108948e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
109048e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
109148e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
109248e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
109348e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
109448e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
109548e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
109648e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
109748e9ddb2SSatish Balay       v += 49;
109848e9ddb2SSatish Balay     }
109948e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
110048e9ddb2SSatish Balay     z += 7;
110148e9ddb2SSatish Balay   }
110248e9ddb2SSatish Balay 
110348e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
110448e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
110548e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
11063a40ed3dSBarry Smith   PetscFunctionReturn(0);
110748e9ddb2SSatish Balay }
110848e9ddb2SSatish Balay 
110948e9ddb2SSatish Balay #undef __FUNC__
11105615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
111139b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
111239b95ed1SBarry Smith {
111339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
111439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
11153a40ed3dSBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
1116c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1117c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
111839b95ed1SBarry Smith 
11193a40ed3dSBarry Smith   PetscFunctionBegin;
1120c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1121c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
112239b95ed1SBarry Smith 
112339b95ed1SBarry Smith   idx   = a->j;
112439b95ed1SBarry Smith   v     = a->a;
112539b95ed1SBarry Smith   ii    = a->i;
112639b95ed1SBarry Smith 
112739b95ed1SBarry Smith 
1128119a934aSSatish Balay   if (!a->mult_work) {
11293b547af2SSatish Balay     k = PetscMax(a->m,a->n);
11302b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1131119a934aSSatish Balay   }
1132119a934aSSatish Balay   work = a->mult_work;
1133119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1134119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1135119a934aSSatish Balay     ncols = n*bs;
1136119a934aSSatish Balay     workt = work;
1137119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1138119a934aSSatish Balay       xb = x + bs*(*idx++);
1139119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1140119a934aSSatish Balay       workt += bs;
1141119a934aSSatish Balay     }
11421a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1143119a934aSSatish Balay     v += n*bs2;
1144119a934aSSatish Balay     z += bs;
1145119a934aSSatish Balay   }
1146c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1147c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11481a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
11493a40ed3dSBarry Smith   PetscFunctionReturn(0);
1150bb42667fSSatish Balay }
1151bb42667fSSatish Balay 
11525615d1e5SSatish Balay #undef __FUNC__
11535615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1154f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1155f44d3a6dSSatish Balay {
1156f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1157f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1158c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1159f44d3a6dSSatish Balay 
11603a40ed3dSBarry Smith   PetscFunctionBegin;
1161c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1162c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1163c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1164f44d3a6dSSatish Balay 
1165f44d3a6dSSatish Balay   idx   = a->j;
1166f44d3a6dSSatish Balay   v     = a->a;
1167f44d3a6dSSatish Balay   ii    = a->i;
1168f44d3a6dSSatish Balay 
1169f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1170f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1171f44d3a6dSSatish Balay     sum  = y[i];
1172f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1173f44d3a6dSSatish Balay     z[i] = sum;
1174f44d3a6dSSatish Balay   }
1175c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1176c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1177c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1178f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
11793a40ed3dSBarry Smith   PetscFunctionReturn(0);
1180f44d3a6dSSatish Balay }
1181f44d3a6dSSatish Balay 
11825615d1e5SSatish Balay #undef __FUNC__
11835615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1184f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1185f44d3a6dSSatish Balay {
1186f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1187f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1188f44d3a6dSSatish Balay   register Scalar x1,x2;
11893a40ed3dSBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1190f44d3a6dSSatish Balay 
11913a40ed3dSBarry Smith   PetscFunctionBegin;
1192c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1193c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1194c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1195f44d3a6dSSatish Balay 
1196f44d3a6dSSatish Balay   idx   = a->j;
1197f44d3a6dSSatish Balay   v     = a->a;
1198f44d3a6dSSatish Balay   ii    = a->i;
1199f44d3a6dSSatish Balay 
1200f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1201f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1202f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1203f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1204f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1205f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1206f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1207f44d3a6dSSatish Balay       v += 4;
1208f44d3a6dSSatish Balay     }
1209f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1210f44d3a6dSSatish Balay     z += 2; y += 2;
1211f44d3a6dSSatish Balay   }
1212c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1213c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1214c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1215f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
12163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1217f44d3a6dSSatish Balay }
1218f44d3a6dSSatish Balay 
12195615d1e5SSatish Balay #undef __FUNC__
12205615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1221f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1222f44d3a6dSSatish Balay {
1223f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1224f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1225c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1226f44d3a6dSSatish Balay 
12273a40ed3dSBarry Smith   PetscFunctionBegin;
1228c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1229c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1230c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1231f44d3a6dSSatish Balay 
1232f44d3a6dSSatish Balay   idx   = a->j;
1233f44d3a6dSSatish Balay   v     = a->a;
1234f44d3a6dSSatish Balay   ii    = a->i;
1235f44d3a6dSSatish Balay 
1236f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1237f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1238f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1239f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1240f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1241f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1242f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1243f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1244f44d3a6dSSatish Balay       v += 9;
1245f44d3a6dSSatish Balay     }
1246f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1247f44d3a6dSSatish Balay     z += 3; y += 3;
1248f44d3a6dSSatish Balay   }
1249c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1250c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1251c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1252f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
12533a40ed3dSBarry Smith   PetscFunctionReturn(0);
1254f44d3a6dSSatish Balay }
1255f44d3a6dSSatish Balay 
12565615d1e5SSatish Balay #undef __FUNC__
12575615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1258f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1259f44d3a6dSSatish Balay {
1260f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
12613a40ed3dSBarry Smith   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
1262f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1263c16cb8f2SBarry Smith   int             j,n;
1264f44d3a6dSSatish Balay 
12653a40ed3dSBarry Smith   PetscFunctionBegin;
1266c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1267c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1268c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1269f44d3a6dSSatish Balay 
1270f44d3a6dSSatish Balay   idx   = a->j;
1271f44d3a6dSSatish Balay   v     = a->a;
1272f44d3a6dSSatish Balay   ii    = a->i;
1273f44d3a6dSSatish Balay 
1274f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1275f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1276f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1277f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1278f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1279f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1280f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1281f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1282f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1283f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1284f44d3a6dSSatish Balay       v += 16;
1285f44d3a6dSSatish Balay     }
1286f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1287f44d3a6dSSatish Balay     z += 4; y += 4;
1288f44d3a6dSSatish Balay   }
1289c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1290c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1291c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1292f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
12933a40ed3dSBarry Smith   PetscFunctionReturn(0);
1294f44d3a6dSSatish Balay }
1295f44d3a6dSSatish Balay 
12965615d1e5SSatish Balay #undef __FUNC__
12975615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1298f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1299f44d3a6dSSatish Balay {
1300f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
13013a40ed3dSBarry Smith   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1302c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1303f44d3a6dSSatish Balay 
13043a40ed3dSBarry Smith   PetscFunctionBegin;
1305c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1306c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1307c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1308f44d3a6dSSatish Balay 
1309f44d3a6dSSatish Balay   idx   = a->j;
1310f44d3a6dSSatish Balay   v     = a->a;
1311f44d3a6dSSatish Balay   ii    = a->i;
1312f44d3a6dSSatish Balay 
1313f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1314f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1315f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1316f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1317f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1318f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1319f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1320f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1321f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1322f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1323f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1324f44d3a6dSSatish Balay       v += 25;
1325f44d3a6dSSatish Balay     }
1326f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1327f44d3a6dSSatish Balay     z += 5; y += 5;
1328f44d3a6dSSatish Balay   }
1329c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1330c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1331c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1332f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
13333a40ed3dSBarry Smith   PetscFunctionReturn(0);
1334f44d3a6dSSatish Balay }
1335f44d3a6dSSatish Balay 
13365615d1e5SSatish Balay #undef __FUNC__
133748e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
133848e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
133948e9ddb2SSatish Balay {
134048e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
134148e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
134248e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
134348e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
134448e9ddb2SSatish Balay 
13453a40ed3dSBarry Smith   PetscFunctionBegin;
134648e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
134748e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
134848e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
134948e9ddb2SSatish Balay 
135048e9ddb2SSatish Balay   idx   = a->j;
135148e9ddb2SSatish Balay   v     = a->a;
135248e9ddb2SSatish Balay   ii    = a->i;
135348e9ddb2SSatish Balay 
135448e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
135548e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
135648e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
135748e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
135848e9ddb2SSatish Balay       xb = x + 7*(*idx++);
135948e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
136048e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
136148e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
136248e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
136348e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
136448e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
136548e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
136648e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
136748e9ddb2SSatish Balay       v += 49;
136848e9ddb2SSatish Balay     }
136948e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
137048e9ddb2SSatish Balay     z += 7; y += 7;
137148e9ddb2SSatish Balay   }
137248e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
137348e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
137448e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
137548e9ddb2SSatish Balay   PLogFlops(98*a->nz);
13763a40ed3dSBarry Smith   PetscFunctionReturn(0);
137748e9ddb2SSatish Balay }
137848e9ddb2SSatish Balay 
137948e9ddb2SSatish Balay 
138048e9ddb2SSatish Balay #undef __FUNC__
13815615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1382f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1383f44d3a6dSSatish Balay {
1384f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1385f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1386f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1387f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1388f44d3a6dSSatish Balay 
13893a40ed3dSBarry Smith   PetscFunctionBegin;
1390f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1391f44d3a6dSSatish Balay 
1392c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1393c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1394f44d3a6dSSatish Balay 
1395f44d3a6dSSatish Balay   idx   = a->j;
1396f44d3a6dSSatish Balay   v     = a->a;
1397f44d3a6dSSatish Balay   ii    = a->i;
1398f44d3a6dSSatish Balay 
1399f44d3a6dSSatish Balay 
1400f44d3a6dSSatish Balay   if (!a->mult_work) {
1401f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1402f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1403f44d3a6dSSatish Balay   }
1404f44d3a6dSSatish Balay   work = a->mult_work;
1405f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1406f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1407f44d3a6dSSatish Balay     ncols = n*bs;
1408f44d3a6dSSatish Balay     workt = work;
1409f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1410f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1411f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1412f44d3a6dSSatish Balay       workt += bs;
1413f44d3a6dSSatish Balay     }
1414f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1415f44d3a6dSSatish Balay     v += n*bs2;
1416f44d3a6dSSatish Balay     z += bs;
1417f44d3a6dSSatish Balay   }
1418c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1419c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1420f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
14213a40ed3dSBarry Smith   PetscFunctionReturn(0);
1422f44d3a6dSSatish Balay }
1423f44d3a6dSSatish Balay 
14245615d1e5SSatish Balay #undef __FUNC__
14255615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
14268f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1427bb42667fSSatish Balay {
1428bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
14291a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1430bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1431bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
14329df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1433119a934aSSatish Balay 
1434119a934aSSatish Balay 
14353a40ed3dSBarry Smith   PetscFunctionBegin;
143690f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
143790f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1438bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1439bb42667fSSatish Balay 
1440119a934aSSatish Balay   idx   = a->j;
1441119a934aSSatish Balay   v     = a->a;
1442119a934aSSatish Balay   ii    = a->i;
1443119a934aSSatish Balay 
1444119a934aSSatish Balay   switch (bs) {
1445119a934aSSatish Balay   case 1:
1446119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1447119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1448119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1449119a934aSSatish Balay       ib = idx + ai[i];
1450119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1451bb1453f0SSatish Balay         rval    = ib[j];
1452bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1453119a934aSSatish Balay       }
1454119a934aSSatish Balay     }
1455119a934aSSatish Balay     break;
1456119a934aSSatish Balay   case 2:
1457119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1458119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1459119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1460119a934aSSatish Balay       ib = idx + ai[i];
1461119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1462119a934aSSatish Balay         rval      = ib[j]*2;
1463bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1464bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1465119a934aSSatish Balay         v += 4;
1466119a934aSSatish Balay       }
1467119a934aSSatish Balay     }
1468119a934aSSatish Balay     break;
1469119a934aSSatish Balay   case 3:
1470119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1471119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1472119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1473119a934aSSatish Balay       ib = idx + ai[i];
1474119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1475119a934aSSatish Balay         rval      = ib[j]*3;
1476bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1477bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1478bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1479119a934aSSatish Balay         v += 9;
1480119a934aSSatish Balay       }
1481119a934aSSatish Balay     }
1482119a934aSSatish Balay     break;
1483119a934aSSatish Balay   case 4:
1484119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1485119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1486119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1487119a934aSSatish Balay       ib = idx + ai[i];
1488119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1489119a934aSSatish Balay         rval      = ib[j]*4;
1490bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1491bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1492bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1493bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1494119a934aSSatish Balay         v += 16;
1495119a934aSSatish Balay       }
1496119a934aSSatish Balay     }
1497119a934aSSatish Balay     break;
1498119a934aSSatish Balay   case 5:
1499119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1500119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1501119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1502119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1503119a934aSSatish Balay       ib = idx + ai[i];
1504119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1505119a934aSSatish Balay         rval      = ib[j]*5;
1506bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1507bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1508bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1509bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1510bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1511119a934aSSatish Balay         v += 25;
1512119a934aSSatish Balay       }
1513119a934aSSatish Balay     }
1514119a934aSSatish Balay     break;
1515119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1516119a934aSSatish Balay     default: {
1517119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1518119a934aSSatish Balay       if (!a->mult_work) {
15193b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1520bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1521119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1522119a934aSSatish Balay       }
1523119a934aSSatish Balay       work = a->mult_work;
1524119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1525119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1526119a934aSSatish Balay         ncols = n*bs;
1527119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1528119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1529119a934aSSatish Balay         v += n*bs2;
1530119a934aSSatish Balay         x += bs;
1531119a934aSSatish Balay         workt = work;
1532119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1533119a934aSSatish Balay           zb = z + bs*(*idx++);
1534bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1535119a934aSSatish Balay           workt += bs;
1536119a934aSSatish Balay         }
1537119a934aSSatish Balay       }
1538119a934aSSatish Balay     }
1539119a934aSSatish Balay   }
15400419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
15410419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1542faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
15433a40ed3dSBarry Smith   PetscFunctionReturn(0);
1544faf6580fSSatish Balay }
15451c351548SSatish Balay 
15465615d1e5SSatish Balay #undef __FUNC__
15475615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
15488f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1549faf6580fSSatish Balay {
1550faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1551faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1552faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1553faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1554faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1555faf6580fSSatish Balay 
15563a40ed3dSBarry Smith   PetscFunctionBegin;
155790f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
155890f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1559faf6580fSSatish Balay 
1560648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1561648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1562648c1d95SSatish Balay 
1563faf6580fSSatish Balay   idx   = a->j;
1564faf6580fSSatish Balay   v     = a->a;
1565faf6580fSSatish Balay   ii    = a->i;
1566faf6580fSSatish Balay 
1567faf6580fSSatish Balay   switch (bs) {
1568faf6580fSSatish Balay   case 1:
1569faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1570faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1571faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1572faf6580fSSatish Balay       ib = idx + ai[i];
1573faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1574faf6580fSSatish Balay         rval    = ib[j];
1575faf6580fSSatish Balay         z[rval] += *v++ * x1;
1576faf6580fSSatish Balay       }
1577faf6580fSSatish Balay     }
1578faf6580fSSatish Balay     break;
1579faf6580fSSatish Balay   case 2:
1580faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1581faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1582faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1583faf6580fSSatish Balay       ib = idx + ai[i];
1584faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1585faf6580fSSatish Balay         rval      = ib[j]*2;
1586faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1587faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1588faf6580fSSatish Balay         v += 4;
1589faf6580fSSatish Balay       }
1590faf6580fSSatish Balay     }
1591faf6580fSSatish Balay     break;
1592faf6580fSSatish Balay   case 3:
1593faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1594faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1595faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1596faf6580fSSatish Balay       ib = idx + ai[i];
1597faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1598faf6580fSSatish Balay         rval      = ib[j]*3;
1599faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1600faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1601faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1602faf6580fSSatish Balay         v += 9;
1603faf6580fSSatish Balay       }
1604faf6580fSSatish Balay     }
1605faf6580fSSatish Balay     break;
1606faf6580fSSatish Balay   case 4:
1607faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1608faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1609faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1610faf6580fSSatish Balay       ib = idx + ai[i];
1611faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1612faf6580fSSatish Balay         rval      = ib[j]*4;
1613faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1614faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1615faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1616faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1617faf6580fSSatish Balay         v += 16;
1618faf6580fSSatish Balay       }
1619faf6580fSSatish Balay     }
1620faf6580fSSatish Balay     break;
1621faf6580fSSatish Balay   case 5:
1622faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1623faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1624faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1625faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1626faf6580fSSatish Balay       ib = idx + ai[i];
1627faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1628faf6580fSSatish Balay         rval      = ib[j]*5;
1629faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1630faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1631faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1632faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1633faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1634faf6580fSSatish Balay         v += 25;
1635faf6580fSSatish Balay       }
1636faf6580fSSatish Balay     }
1637faf6580fSSatish Balay     break;
1638faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1639faf6580fSSatish Balay     default: {
1640faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1641faf6580fSSatish Balay       if (!a->mult_work) {
1642faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1643faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1644faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1645faf6580fSSatish Balay       }
1646faf6580fSSatish Balay       work = a->mult_work;
1647faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1648faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1649faf6580fSSatish Balay         ncols = n*bs;
1650faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1651faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1652faf6580fSSatish Balay         v += n*bs2;
1653faf6580fSSatish Balay         x += bs;
1654faf6580fSSatish Balay         workt = work;
1655faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1656faf6580fSSatish Balay           zb = z + bs*(*idx++);
1657faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1658faf6580fSSatish Balay           workt += bs;
1659faf6580fSSatish Balay         }
1660faf6580fSSatish Balay       }
1661faf6580fSSatish Balay     }
1662faf6580fSSatish Balay   }
1663faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1664faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1665faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16663a40ed3dSBarry Smith   PetscFunctionReturn(0);
16672593348eSBarry Smith }
16682593348eSBarry Smith 
16695615d1e5SSatish Balay #undef __FUNC__
1670d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ"
16718f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16722593348eSBarry Smith {
1673b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16744e220ebcSLois Curfman McInnes 
16753a40ed3dSBarry Smith   PetscFunctionBegin;
16764e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16774e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16784e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16794e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16804e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16814e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16824e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16834e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16844e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16854e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16864e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16874e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16884e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16894e220ebcSLois Curfman McInnes   if (A->factor) {
16904e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16914e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16924e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16934e220ebcSLois Curfman McInnes   } else {
16944e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16954e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16964e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16974e220ebcSLois Curfman McInnes   }
16983a40ed3dSBarry Smith   PetscFunctionReturn(0);
16992593348eSBarry Smith }
17002593348eSBarry Smith 
17015615d1e5SSatish Balay #undef __FUNC__
17025615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
17038f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
170491d316f6SSatish Balay {
170591d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
170691d316f6SSatish Balay 
17073a40ed3dSBarry Smith   PetscFunctionBegin;
1708a8c6a408SBarry Smith   if (B->type !=MATSEQBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
170991d316f6SSatish Balay 
171091d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
17113a40ed3dSBarry Smith   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
17123a40ed3dSBarry Smith     *flg = PETSC_FALSE; PetscFunctionReturn(0);
171391d316f6SSatish Balay   }
171491d316f6SSatish Balay 
171591d316f6SSatish Balay   /* if the a->i are the same */
171691d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
17173a40ed3dSBarry Smith     *flg = PETSC_FALSE; PetscFunctionReturn(0);
171891d316f6SSatish Balay   }
171991d316f6SSatish Balay 
172091d316f6SSatish Balay   /* if a->j are the same */
172191d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
17223a40ed3dSBarry Smith     *flg = PETSC_FALSE; PetscFunctionReturn(0);
172391d316f6SSatish Balay   }
172491d316f6SSatish Balay 
172591d316f6SSatish Balay   /* if a->a are the same */
172691d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
17273a40ed3dSBarry Smith     *flg = PETSC_FALSE; PetscFunctionReturn(0);
172891d316f6SSatish Balay   }
172991d316f6SSatish Balay   *flg = PETSC_TRUE;
17303a40ed3dSBarry Smith   PetscFunctionReturn(0);
173191d316f6SSatish Balay 
173291d316f6SSatish Balay }
173391d316f6SSatish Balay 
17345615d1e5SSatish Balay #undef __FUNC__
17355615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
17368f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
173791d316f6SSatish Balay {
173891d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17397e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
174017e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
174117e48fc4SSatish Balay 
17423a40ed3dSBarry Smith   PetscFunctionBegin;
1743a8c6a408SBarry Smith   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix");
174417e48fc4SSatish Balay   bs   = a->bs;
174517e48fc4SSatish Balay   aa   = a->a;
174617e48fc4SSatish Balay   ai   = a->i;
174717e48fc4SSatish Balay   aj   = a->j;
174817e48fc4SSatish Balay   ambs = a->mbs;
17499df24120SSatish Balay   bs2  = a->bs2;
175091d316f6SSatish Balay 
175191d316f6SSatish Balay   VecSet(&zero,v);
175290f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1753a8c6a408SBarry Smith   if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");
175417e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
175517e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
175617e48fc4SSatish Balay       if (aj[j] == i) {
175717e48fc4SSatish Balay         row  = i*bs;
17587e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17597e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
176091d316f6SSatish Balay         break;
176191d316f6SSatish Balay       }
176291d316f6SSatish Balay     }
176391d316f6SSatish Balay   }
17643a40ed3dSBarry Smith   PetscFunctionReturn(0);
176591d316f6SSatish Balay }
176691d316f6SSatish Balay 
17675615d1e5SSatish Balay #undef __FUNC__
17685615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17698f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17709867e207SSatish Balay {
17719867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17729867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17737e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17749867e207SSatish Balay 
17753a40ed3dSBarry Smith   PetscFunctionBegin;
17769867e207SSatish Balay   ai  = a->i;
17779867e207SSatish Balay   aj  = a->j;
17789867e207SSatish Balay   aa  = a->a;
17799867e207SSatish Balay   m   = a->m;
17809867e207SSatish Balay   n   = a->n;
17819867e207SSatish Balay   bs  = a->bs;
17829867e207SSatish Balay   mbs = a->mbs;
17839df24120SSatish Balay   bs2 = a->bs2;
17849867e207SSatish Balay   if (ll) {
178590f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1786a8c6a408SBarry Smith     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length");
17879867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17889867e207SSatish Balay       M  = ai[i+1] - ai[i];
17899867e207SSatish Balay       li = l + i*bs;
17907e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17919867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17927e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17939867e207SSatish Balay           (*v++) *= li[k%bs];
17949867e207SSatish Balay         }
17959867e207SSatish Balay       }
17969867e207SSatish Balay     }
17979867e207SSatish Balay   }
17989867e207SSatish Balay 
17999867e207SSatish Balay   if (rr) {
180090f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1801a8c6a408SBarry Smith     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length");
18029867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
18039867e207SSatish Balay       M  = ai[i+1] - ai[i];
18047e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
18059867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
18069867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
18079867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
18089867e207SSatish Balay           x = ri[k];
18099867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
18109867e207SSatish Balay         }
18119867e207SSatish Balay       }
18129867e207SSatish Balay     }
18139867e207SSatish Balay   }
18143a40ed3dSBarry Smith   PetscFunctionReturn(0);
18159867e207SSatish Balay }
18169867e207SSatish Balay 
18179867e207SSatish Balay 
1818b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1819b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
18202a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1821736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1822736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
18231a6a6d98SBarry Smith 
18241a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
18251a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
18261a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
18271a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
18281a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
18291a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
183048e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
18311a6a6d98SBarry Smith 
18327fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
18337fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
18347fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
18357fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
18367fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
18377fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
18382593348eSBarry Smith 
18395615d1e5SSatish Balay #undef __FUNC__
18405615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
18418f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
18422593348eSBarry Smith {
1843b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18442593348eSBarry Smith   Scalar      *v = a->a;
18452593348eSBarry Smith   double      sum = 0.0;
18469df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18472593348eSBarry Smith 
18483a40ed3dSBarry Smith   PetscFunctionBegin;
18492593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18500419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18513a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
18522593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18532593348eSBarry Smith #else
18542593348eSBarry Smith       sum += (*v)*(*v); v++;
18552593348eSBarry Smith #endif
18562593348eSBarry Smith     }
18572593348eSBarry Smith     *norm = sqrt(sum);
18582593348eSBarry Smith   }
18592593348eSBarry Smith   else {
1860e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18612593348eSBarry Smith   }
18623a40ed3dSBarry Smith   PetscFunctionReturn(0);
18632593348eSBarry Smith }
18642593348eSBarry Smith 
18653eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
18662593348eSBarry Smith /*
18672593348eSBarry Smith      note: This can only work for identity for row and col. It would
18682593348eSBarry Smith    be good to check this and otherwise generate an error.
18692593348eSBarry Smith */
18705615d1e5SSatish Balay #undef __FUNC__
18715615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
18728f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18732593348eSBarry Smith {
1874b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18752593348eSBarry Smith   Mat         outA;
1876de6a44a3SBarry Smith   int         ierr;
18772593348eSBarry Smith 
18783a40ed3dSBarry Smith   PetscFunctionBegin;
1879a8c6a408SBarry Smith   if (fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
18802593348eSBarry Smith 
18812593348eSBarry Smith   outA          = inA;
18822593348eSBarry Smith   inA->factor   = FACTOR_LU;
18832593348eSBarry Smith   a->row        = row;
18842593348eSBarry Smith   a->col        = col;
18852593348eSBarry Smith 
1886eed86810SBarry Smith   if (!a->solve_work) {
1887de6a44a3SBarry Smith     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1888eed86810SBarry Smith     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1889eed86810SBarry Smith   }
18902593348eSBarry Smith 
18912593348eSBarry Smith   if (!a->diag) {
1892b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18932593348eSBarry Smith   }
18947fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18953eee16b0SBarry Smith 
18963eee16b0SBarry Smith   /*
18973eee16b0SBarry Smith       Blocksize 4 has a special faster solver for ILU(0) factorization
18983eee16b0SBarry Smith     with natural ordering
18993eee16b0SBarry Smith   */
19003eee16b0SBarry Smith   if (a->bs == 4) {
19013eee16b0SBarry Smith     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
19023eee16b0SBarry Smith   }
19033eee16b0SBarry Smith 
19043a40ed3dSBarry Smith   PetscFunctionReturn(0);
19052593348eSBarry Smith }
19062593348eSBarry Smith 
19075615d1e5SSatish Balay #undef __FUNC__
19085615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
19098f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
19102593348eSBarry Smith {
1911b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
19129df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
19133a40ed3dSBarry Smith 
19143a40ed3dSBarry Smith   PetscFunctionBegin;
1915b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1916b6490206SBarry Smith   PLogFlops(totalnz);
19173a40ed3dSBarry Smith   PetscFunctionReturn(0);
19182593348eSBarry Smith }
19192593348eSBarry Smith 
19205615d1e5SSatish Balay #undef __FUNC__
19215615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
19228f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
19237e67e3f9SSatish Balay {
19247e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
19257e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1926a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
19279df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
19287e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
19297e67e3f9SSatish Balay 
19303a40ed3dSBarry Smith   PetscFunctionBegin;
19317e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
19327e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1933a8c6a408SBarry Smith     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
1934a8c6a408SBarry Smith     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
1935a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
19367e67e3f9SSatish Balay     nrow = ailen[brow];
19377e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1938a8c6a408SBarry Smith       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
1939a8c6a408SBarry Smith       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
1940a7c10996SSatish Balay       col  = in[l] ;
19417e67e3f9SSatish Balay       bcol = col/bs;
19427e67e3f9SSatish Balay       cidx = col%bs;
19437e67e3f9SSatish Balay       ridx = row%bs;
19447e67e3f9SSatish Balay       high = nrow;
19457e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
19467e67e3f9SSatish Balay       while (high-low > 5) {
19477e67e3f9SSatish Balay         t = (low+high)/2;
19487e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
19497e67e3f9SSatish Balay         else             low  = t;
19507e67e3f9SSatish Balay       }
19517e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
19527e67e3f9SSatish Balay         if (rp[i] > bcol) break;
19537e67e3f9SSatish Balay         if (rp[i] == bcol) {
19547e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
19557e67e3f9SSatish Balay           goto finished;
19567e67e3f9SSatish Balay         }
19577e67e3f9SSatish Balay       }
19587e67e3f9SSatish Balay       *v++ = zero;
19597e67e3f9SSatish Balay       finished:;
19607e67e3f9SSatish Balay     }
19617e67e3f9SSatish Balay   }
19623a40ed3dSBarry Smith   PetscFunctionReturn(0);
19637e67e3f9SSatish Balay }
19647e67e3f9SSatish Balay 
19655615d1e5SSatish Balay #undef __FUNC__
1966d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
19678f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19685a838052SSatish Balay {
19695a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19703a40ed3dSBarry Smith 
19713a40ed3dSBarry Smith   PetscFunctionBegin;
19725a838052SSatish Balay   *bs = baij->bs;
19733a40ed3dSBarry Smith   PetscFunctionReturn(0);
19745a838052SSatish Balay }
19755a838052SSatish Balay 
1976d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19775615d1e5SSatish Balay #undef __FUNC__
19785615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1979d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1980d9b7c43dSSatish Balay {
1981d9b7c43dSSatish Balay   int i,row;
19823a40ed3dSBarry Smith 
19833a40ed3dSBarry Smith   PetscFunctionBegin;
1984d9b7c43dSSatish Balay   row = idx[0];
19853a40ed3dSBarry Smith   if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
1986d9b7c43dSSatish Balay 
1987d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
19883a40ed3dSBarry Smith     if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); }
1989d9b7c43dSSatish Balay   }
1990d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
19913a40ed3dSBarry Smith   PetscFunctionReturn(0);
1992d9b7c43dSSatish Balay }
1993d9b7c43dSSatish Balay 
19945615d1e5SSatish Balay #undef __FUNC__
19955615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
19968f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1997d9b7c43dSSatish Balay {
1998d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1999d9b7c43dSSatish Balay   IS          is_local;
2000d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
2001d9b7c43dSSatish Balay   PetscTruth  flg;
2002d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
2003d9b7c43dSSatish Balay 
20043a40ed3dSBarry Smith   PetscFunctionBegin;
2005d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2006d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
2007d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
2008537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
2009d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
2010d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
2011d9b7c43dSSatish Balay 
2012d9b7c43dSSatish Balay   i = 0;
2013d9b7c43dSSatish Balay   while (i < is_n) {
2014a8c6a408SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"row out of range");
2015d9b7c43dSSatish Balay     flg = PETSC_FALSE;
2016d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
2017d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
2018d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
2019d9b7c43dSSatish Balay       i += bs;
2020d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
2021d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
2022d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
2023d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
2024d9b7c43dSSatish Balay         aa[0] = zero;
2025d9b7c43dSSatish Balay         aa+=bs;
2026d9b7c43dSSatish Balay       }
2027d9b7c43dSSatish Balay       i++;
2028d9b7c43dSSatish Balay     }
2029d9b7c43dSSatish Balay   }
2030d9b7c43dSSatish Balay   if (diag) {
2031d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
2032d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
2033d9b7c43dSSatish Balay     }
2034d9b7c43dSSatish Balay   }
2035d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
2036d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
2037d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
20389a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2039d9b7c43dSSatish Balay 
20403a40ed3dSBarry Smith   PetscFunctionReturn(0);
2041d9b7c43dSSatish Balay }
20421c351548SSatish Balay 
20435615d1e5SSatish Balay #undef __FUNC__
2044d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ"
2045ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
2046ba4ca20aSSatish Balay {
2047ba4ca20aSSatish Balay   static int called = 0;
2048ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
2049ba4ca20aSSatish Balay 
20503a40ed3dSBarry Smith   PetscFunctionBegin;
20513a40ed3dSBarry Smith   if (called) {PetscFunctionReturn(0);} else called = 1;
2052ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2053ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
20543a40ed3dSBarry Smith   PetscFunctionReturn(0);
2055ba4ca20aSSatish Balay }
2056d9b7c43dSSatish Balay 
20572593348eSBarry Smith /* -------------------------------------------------------------------*/
2058cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
20599867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2060f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2061faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
20621a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
2063b6490206SBarry Smith        0,0,
2064de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
2065b6490206SBarry Smith        0,
2066f2501298SSatish Balay        MatTranspose_SeqBAIJ,
206717e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
20689867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2069584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2070b6490206SBarry Smith        0,
2071d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20727fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2073b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2074de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2075d402145bSBarry Smith        0,0,
2076b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2077b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2078af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20797e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2080ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20813b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20823b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
208392c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
208492c4ed94SBarry Smith        0,0,0,0,0,0,
208592c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20862593348eSBarry Smith 
20875615d1e5SSatish Balay #undef __FUNC__
20885615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20892593348eSBarry Smith /*@C
209044cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
209144cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
209244cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20932bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20942bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20952593348eSBarry Smith 
20962593348eSBarry Smith    Input Parameters:
2097029af93fSBarry Smith .  comm - MPI communicator, set to PETSC_COMM_SELF
2098b6490206SBarry Smith .  bs - size of block
20992593348eSBarry Smith .  m - number of rows
21002593348eSBarry Smith .  n - number of columns
2101b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
21022bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
21032bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
21042593348eSBarry Smith 
21052593348eSBarry Smith    Output Parameter:
21062593348eSBarry Smith .  A - the matrix
21072593348eSBarry Smith 
21080513a670SBarry Smith    Options Database Keys:
21090513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
21100effe34fSLois Curfman McInnes $                     block calculations (much slower)
21110513a670SBarry Smith $    -mat_block_size - size of the blocks to use
21120513a670SBarry Smith 
21132593348eSBarry Smith    Notes:
211444cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
21152593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
211644cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
21172593348eSBarry Smith 
21182593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
21192593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
21202593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
21216da5968aSLois Curfman McInnes    matrices.
21222593348eSBarry Smith 
212344cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
21242593348eSBarry Smith @*/
2125b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
21262593348eSBarry Smith {
21272593348eSBarry Smith   Mat         B;
2128b6490206SBarry Smith   Mat_SeqBAIJ *b;
21293b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
21303b2fbd54SBarry Smith 
21313a40ed3dSBarry Smith   PetscFunctionBegin;
21323b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2133a8c6a408SBarry Smith   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
2134b6490206SBarry Smith 
21350513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
21360513a670SBarry Smith 
21373a40ed3dSBarry Smith   if (mbs*bs!=m || nbs*bs!=n) {
2138a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
21393a40ed3dSBarry Smith   }
21402593348eSBarry Smith 
21412593348eSBarry Smith   *A = 0;
2142d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
21432593348eSBarry Smith   PLogObjectCreate(B);
2144b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
214544cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
21462593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
21471a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
21481a6a6d98SBarry Smith   if (!flg) {
21497fc0212eSBarry Smith     switch (bs) {
21507fc0212eSBarry Smith     case 1:
21517fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21521a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
215339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2154f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
21557fc0212eSBarry Smith       break;
21564eeb42bcSBarry Smith     case 2:
21574eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
21581a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
215939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2160f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
21616d84be18SBarry Smith       break;
2162f327dd97SBarry Smith     case 3:
2163f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
21641a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
216539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2166f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
21674eeb42bcSBarry Smith       break;
21686d84be18SBarry Smith     case 4:
21696d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
21701a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
217139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2172f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21736d84be18SBarry Smith       break;
21746d84be18SBarry Smith     case 5:
21756d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21761a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
217739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2178f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21796d84be18SBarry Smith       break;
218048e9ddb2SSatish Balay     case 7:
218148e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
218248e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
218348e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
218448e9ddb2SSatish Balay       break;
21857fc0212eSBarry Smith     }
21861a6a6d98SBarry Smith   }
2187b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2188b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21892593348eSBarry Smith   B->factor           = 0;
21902593348eSBarry Smith   B->lupivotthreshold = 1.0;
219190f02eecSBarry Smith   B->mapping          = 0;
21922593348eSBarry Smith   b->row              = 0;
21932593348eSBarry Smith   b->col              = 0;
21942593348eSBarry Smith   b->reallocs         = 0;
21952593348eSBarry Smith 
219644cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
219744cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2198b6490206SBarry Smith   b->mbs     = mbs;
2199f2501298SSatish Balay   b->nbs     = nbs;
2200b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
22012593348eSBarry Smith   if (nnz == PETSC_NULL) {
2202119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
22032593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2204b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2205b6490206SBarry Smith     nz = nz*mbs;
22063a40ed3dSBarry Smith   } else {
22072593348eSBarry Smith     nz = 0;
2208b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
22092593348eSBarry Smith   }
22102593348eSBarry Smith 
22112593348eSBarry Smith   /* allocate the matrix space */
22127e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
22132593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
22147e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
22157e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
22162593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
22172593348eSBarry Smith   b->i  = b->j + nz;
22182593348eSBarry Smith   b->singlemalloc = 1;
22192593348eSBarry Smith 
2220de6a44a3SBarry Smith   b->i[0] = 0;
2221b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
22222593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
22232593348eSBarry Smith   }
22242593348eSBarry Smith 
2225b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2226b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2227f09e8eb9SSatish Balay   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2228b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
22292593348eSBarry Smith 
2230b6490206SBarry Smith   b->bs               = bs;
22319df24120SSatish Balay   b->bs2              = bs2;
2232b6490206SBarry Smith   b->mbs              = mbs;
22332593348eSBarry Smith   b->nz               = 0;
223418e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
22352593348eSBarry Smith   b->sorted           = 0;
22362593348eSBarry Smith   b->roworiented      = 1;
22372593348eSBarry Smith   b->nonew            = 0;
22382593348eSBarry Smith   b->diag             = 0;
22392593348eSBarry Smith   b->solve_work       = 0;
2240de6a44a3SBarry Smith   b->mult_work        = 0;
22412593348eSBarry Smith   b->spptr            = 0;
22424e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
22434e220ebcSLois Curfman McInnes 
22442593348eSBarry Smith   *A = B;
22452593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
22462593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
22473a40ed3dSBarry Smith   PetscFunctionReturn(0);
22482593348eSBarry Smith }
22492593348eSBarry Smith 
22505615d1e5SSatish Balay #undef __FUNC__
22515615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2252b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
22532593348eSBarry Smith {
22542593348eSBarry Smith   Mat         C;
2255b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
22569df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2257de6a44a3SBarry Smith 
22583a40ed3dSBarry Smith   PetscFunctionBegin;
2259a8c6a408SBarry Smith   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
22602593348eSBarry Smith 
22612593348eSBarry Smith   *B = 0;
2262d4bb536fSBarry Smith   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
22632593348eSBarry Smith   PLogObjectCreate(C);
2264b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
22652593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2266b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2267b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
22682593348eSBarry Smith   C->factor     = A->factor;
22692593348eSBarry Smith   c->row        = 0;
22702593348eSBarry Smith   c->col        = 0;
22712593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22722593348eSBarry Smith 
227344cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
227444cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
227544cd7ae7SLois Curfman McInnes   C->M          = a->m;
227644cd7ae7SLois Curfman McInnes   C->N          = a->n;
227744cd7ae7SLois Curfman McInnes 
2278b6490206SBarry Smith   c->bs         = a->bs;
22799df24120SSatish Balay   c->bs2        = a->bs2;
2280b6490206SBarry Smith   c->mbs        = a->mbs;
2281b6490206SBarry Smith   c->nbs        = a->nbs;
22822593348eSBarry Smith 
2283b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2284b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2285b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22862593348eSBarry Smith     c->imax[i] = a->imax[i];
22872593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22882593348eSBarry Smith   }
22892593348eSBarry Smith 
22902593348eSBarry Smith   /* allocate the matrix space */
22912593348eSBarry Smith   c->singlemalloc = 1;
22927e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22932593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22947e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2295de6a44a3SBarry Smith   c->i  = c->j + nz;
2296b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2297b6490206SBarry Smith   if (mbs > 0) {
2298de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22992593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
23007e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
23012593348eSBarry Smith     }
23022593348eSBarry Smith   }
23032593348eSBarry Smith 
2304f09e8eb9SSatish Balay   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
23052593348eSBarry Smith   c->sorted      = a->sorted;
23062593348eSBarry Smith   c->roworiented = a->roworiented;
23072593348eSBarry Smith   c->nonew       = a->nonew;
23082593348eSBarry Smith 
23092593348eSBarry Smith   if (a->diag) {
2310b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2311b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2312b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
23132593348eSBarry Smith       c->diag[i] = a->diag[i];
23142593348eSBarry Smith     }
23152593348eSBarry Smith   }
23162593348eSBarry Smith   else c->diag          = 0;
23172593348eSBarry Smith   c->nz                 = a->nz;
23182593348eSBarry Smith   c->maxnz              = a->maxnz;
23192593348eSBarry Smith   c->solve_work         = 0;
23202593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
23217fc0212eSBarry Smith   c->mult_work          = 0;
23222593348eSBarry Smith   *B = C;
23233a40ed3dSBarry Smith   PetscFunctionReturn(0);
23242593348eSBarry Smith }
23252593348eSBarry Smith 
23265615d1e5SSatish Balay #undef __FUNC__
23275615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
232819bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
23292593348eSBarry Smith {
2330b6490206SBarry Smith   Mat_SeqBAIJ  *a;
23312593348eSBarry Smith   Mat          B;
2332de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2333b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
233435aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2335a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2336b6490206SBarry Smith   Scalar       *aa;
233719bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
23382593348eSBarry Smith 
23393a40ed3dSBarry Smith   PetscFunctionBegin;
23401a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2341de6a44a3SBarry Smith   bs2  = bs*bs;
2342b6490206SBarry Smith 
23432593348eSBarry Smith   MPI_Comm_size(comm,&size);
2344*cc0fae11SBarry Smith   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
234590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
23460752156aSBarry Smith   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
2347a8c6a408SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
23482593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
23492593348eSBarry Smith 
2350d64ed03dSBarry Smith   if (header[3] < 0) {
2351a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
2352d64ed03dSBarry Smith   }
2353d64ed03dSBarry Smith 
2354a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
235535aab85fSBarry Smith 
235635aab85fSBarry Smith   /*
235735aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
235835aab85fSBarry Smith     divisible by the blocksize
235935aab85fSBarry Smith   */
2360b6490206SBarry Smith   mbs        = M/bs;
236135aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
236235aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
236335aab85fSBarry Smith   else                  mbs++;
236435aab85fSBarry Smith   if (extra_rows) {
2365537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
236635aab85fSBarry Smith   }
2367b6490206SBarry Smith 
23682593348eSBarry Smith   /* read in row lengths */
236935aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
23700752156aSBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
237135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
23722593348eSBarry Smith 
2373b6490206SBarry Smith   /* read in column indices */
237435aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
23750752156aSBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
237635aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2377b6490206SBarry Smith 
2378b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2379b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2380b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
238135aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
238235aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
238335aab85fSBarry Smith   masked = mask + mbs;
2384b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2385b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
238635aab85fSBarry Smith     nmask = 0;
2387b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2388b6490206SBarry Smith       kmax = rowlengths[rowcount];
2389b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
239035aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
239135aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2392b6490206SBarry Smith       }
2393b6490206SBarry Smith       rowcount++;
2394b6490206SBarry Smith     }
239535aab85fSBarry Smith     browlengths[i] += nmask;
239635aab85fSBarry Smith     /* zero out the mask elements we set */
239735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2398b6490206SBarry Smith   }
2399b6490206SBarry Smith 
24002593348eSBarry Smith   /* create our matrix */
24013eee16b0SBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
24022593348eSBarry Smith   B = *A;
2403b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
24042593348eSBarry Smith 
24052593348eSBarry Smith   /* set matrix "i" values */
2406de6a44a3SBarry Smith   a->i[0] = 0;
2407b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2408b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2409b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
24102593348eSBarry Smith   }
2411b6490206SBarry Smith   a->nz         = 0;
2412b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
24132593348eSBarry Smith 
2414b6490206SBarry Smith   /* read in nonzero values */
241535aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
24160752156aSBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
241735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2418b6490206SBarry Smith 
2419b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2420b6490206SBarry Smith   nzcount = 0; jcount = 0;
2421b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2422b6490206SBarry Smith     nzcountb = nzcount;
242335aab85fSBarry Smith     nmask    = 0;
2424b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2425b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2426b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
242735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
242835aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2429b6490206SBarry Smith       }
2430b6490206SBarry Smith       rowcount++;
2431b6490206SBarry Smith     }
2432de6a44a3SBarry Smith     /* sort the masked values */
243377c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2434de6a44a3SBarry Smith 
2435b6490206SBarry Smith     /* set "j" values into matrix */
2436b6490206SBarry Smith     maskcount = 1;
243735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
243835aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2439de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2440b6490206SBarry Smith     }
2441b6490206SBarry Smith     /* set "a" values into matrix */
2442de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2443b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2444b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2445b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2446de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2447de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2448de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2449de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2450b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2451b6490206SBarry Smith       }
2452b6490206SBarry Smith     }
245335aab85fSBarry Smith     /* zero out the mask elements we set */
245435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2455b6490206SBarry Smith   }
2456a8c6a408SBarry Smith   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
2457b6490206SBarry Smith 
2458b6490206SBarry Smith   PetscFree(rowlengths);
2459b6490206SBarry Smith   PetscFree(browlengths);
2460b6490206SBarry Smith   PetscFree(aa);
2461b6490206SBarry Smith   PetscFree(jj);
2462b6490206SBarry Smith   PetscFree(mask);
2463b6490206SBarry Smith 
2464b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2465de6a44a3SBarry Smith 
24669c01be13SBarry Smith   ierr = MatView_Private(B); CHKERRQ(ierr);
24673a40ed3dSBarry Smith   PetscFunctionReturn(0);
24682593348eSBarry Smith }
24692593348eSBarry Smith 
24702593348eSBarry Smith 
24712593348eSBarry Smith 
2472