xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5615d1e584023db9367fb782d85b1b4ebbb8df18)
12593348eSBarry Smith #ifndef lint
2*5615d1e5SSatish Balay static char vcid[] = "$Id: baij.c,v 1.81 1997/01/01 03:38:19 bsmith Exp balay $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
19*5615d1e5SSatish Balay #undef __FUNC__
20*5615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
22de6a44a3SBarry Smith {
23de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
247fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
25de6a44a3SBarry Smith 
26de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
287fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
29de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30de6a44a3SBarry Smith       if (a->j[j] == i) {
31de6a44a3SBarry Smith         diag[i] = j;
32de6a44a3SBarry Smith         break;
33de6a44a3SBarry Smith       }
34de6a44a3SBarry Smith     }
35de6a44a3SBarry Smith   }
36de6a44a3SBarry Smith   a->diag = diag;
37de6a44a3SBarry Smith   return 0;
38de6a44a3SBarry Smith }
392593348eSBarry Smith 
402593348eSBarry Smith #include "draw.h"
412593348eSBarry Smith #include "pinclude/pviewer.h"
4277c4ece6SBarry Smith #include "sys.h"
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
46*5615d1e5SSatish Balay #undef __FUNC__
47*5615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
71*5615d1e5SSatish Balay #undef __FUNC__
72*5615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
92*5615d1e5SSatish Balay #undef __FUNC__
93*5615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
1197e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
1347e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
150*5615d1e5SSatish Balay #undef __FUNC__
151*5615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes #else
18144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18344cd7ae7SLois Curfman McInnes #endif
18444cd7ae7SLois Curfman McInnes           }
18544cd7ae7SLois Curfman McInnes         }
18644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18744cd7ae7SLois Curfman McInnes       }
18844cd7ae7SLois Curfman McInnes     }
18944cd7ae7SLois Curfman McInnes   }
1902593348eSBarry Smith   else {
191b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
192b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
193b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
194b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1977e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1997e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20088685aaeSLois Curfman McInnes           }
20188685aaeSLois Curfman McInnes           else {
2027e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes #else
2057e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20688685aaeSLois Curfman McInnes #endif
2072593348eSBarry Smith           }
2082593348eSBarry Smith         }
2092593348eSBarry Smith         fprintf(fd,"\n");
2102593348eSBarry Smith       }
2112593348eSBarry Smith     }
212b6490206SBarry Smith   }
2132593348eSBarry Smith   fflush(fd);
2142593348eSBarry Smith   return 0;
2152593348eSBarry Smith }
2162593348eSBarry Smith 
217*5615d1e5SSatish Balay #undef __FUNC__
218*5615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Draw"
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
2453270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
2593270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
2743270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
3053270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
3193270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
3343270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
344*5615d1e5SSatish Balay #undef __FUNC__
345*5615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ"
346b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3472593348eSBarry Smith {
3482593348eSBarry Smith   Mat         A = (Mat) obj;
34919bcc07fSBarry Smith   ViewerType  vtype;
35019bcc07fSBarry Smith   int         ierr;
3512593348eSBarry Smith 
35219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35319bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
354e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
360b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3612593348eSBarry Smith   }
36219bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3633270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3642593348eSBarry Smith   }
3652593348eSBarry Smith   return 0;
3662593348eSBarry Smith }
367b6490206SBarry Smith 
368119a934aSSatish Balay #define CHUNKSIZE  10
369cd0e1443SSatish Balay 
370*5615d1e5SSatish Balay #undef __FUNC__
371*5615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373cd0e1443SSatish Balay {
374cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3789df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
379cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
380cd0e1443SSatish Balay 
381cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
382cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
384e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
385e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3863b2fbd54SBarry Smith #endif
3872c3acbe9SBarry Smith     rp   = aj + ai[brow];
3882c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3892c3acbe9SBarry Smith     rmax = imax[brow];
3902c3acbe9SBarry Smith     nrow = ailen[brow];
391cd0e1443SSatish Balay     low  = 0;
392cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
394e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
3963b2fbd54SBarry Smith #endif
397a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
398cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
399cd0e1443SSatish Balay       if (roworiented) {
400cd0e1443SSatish Balay         value = *v++;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       else {
403cd0e1443SSatish Balay         value = v[k + l*m];
404cd0e1443SSatish Balay       }
405cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4062c3acbe9SBarry Smith       while (high-low > 7) {
407cd0e1443SSatish Balay         t = (low+high)/2;
408cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
409cd0e1443SSatish Balay         else              low  = t;
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
412cd0e1443SSatish Balay         if (rp[i] > bcol) break;
413cd0e1443SSatish Balay         if (rp[i] == bcol) {
4147e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
415cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
416cd0e1443SSatish Balay           else                  *bap  = value;
417cd0e1443SSatish Balay           goto noinsert;
418cd0e1443SSatish Balay         }
419cd0e1443SSatish Balay       }
420cd0e1443SSatish Balay       if (nonew) goto noinsert;
421cd0e1443SSatish Balay       if (nrow >= rmax) {
422cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
423cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424cd0e1443SSatish Balay         Scalar *new_a;
425cd0e1443SSatish Balay 
426cd0e1443SSatish Balay         /* malloc new storage space */
4277e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4297e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
430cd0e1443SSatish Balay         new_i   = new_j + new_nz;
431cd0e1443SSatish Balay 
432cd0e1443SSatish Balay         /* copy over old data into new slots */
433cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4347e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438cd0e1443SSatish Balay                                                            len*sizeof(int));
439a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443cd0e1443SSatish Balay         /* free up old matrix storage */
444cd0e1443SSatish Balay         PetscFree(a->a);
445cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447cd0e1443SSatish Balay         a->singlemalloc = 1;
448cd0e1443SSatish Balay 
449a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4517e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
45218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
453cd0e1443SSatish Balay         a->reallocs++;
454119a934aSSatish Balay         a->nz++;
455cd0e1443SSatish Balay       }
4567e67e3f9SSatish Balay       N = nrow++ - 1;
457cd0e1443SSatish Balay       /* shift up all the later entries in this row */
458cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
459cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4607e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461cd0e1443SSatish Balay       }
4627e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463cd0e1443SSatish Balay       rp[i]                      = bcol;
4647e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
465cd0e1443SSatish Balay       noinsert:;
466cd0e1443SSatish Balay       low = i;
467cd0e1443SSatish Balay     }
468cd0e1443SSatish Balay     ailen[brow] = nrow;
469cd0e1443SSatish Balay   }
470cd0e1443SSatish Balay   return 0;
471cd0e1443SSatish Balay }
472cd0e1443SSatish Balay 
473*5615d1e5SSatish Balay #undef __FUNC__
474*5615d1e5SSatish Balay #define __FUNC__ "MatGetSize_SeqBAIJ"
4750b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4760b824a48SSatish Balay {
4770b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4780b824a48SSatish Balay   *m = a->m; *n = a->n;
4790b824a48SSatish Balay   return 0;
4800b824a48SSatish Balay }
4810b824a48SSatish Balay 
482*5615d1e5SSatish Balay #undef __FUNC__
483*5615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
4840b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4850b824a48SSatish Balay {
4860b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4870b824a48SSatish Balay   *m = 0; *n = a->m;
4880b824a48SSatish Balay   return 0;
4890b824a48SSatish Balay }
4900b824a48SSatish Balay 
491*5615d1e5SSatish Balay #undef __FUNC__
492*5615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
4939867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4949867e207SSatish Balay {
4959867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4967e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4979867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4989867e207SSatish Balay 
4999867e207SSatish Balay   bs  = a->bs;
5009867e207SSatish Balay   ai  = a->i;
5019867e207SSatish Balay   aj  = a->j;
5029867e207SSatish Balay   aa  = a->a;
5039df24120SSatish Balay   bs2 = a->bs2;
5049867e207SSatish Balay 
505e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
5069867e207SSatish Balay 
5079867e207SSatish Balay   bn  = row/bs;   /* Block number */
5089867e207SSatish Balay   bp  = row % bs; /* Block Position */
5099867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
5109867e207SSatish Balay   *nz = bs*M;
5119867e207SSatish Balay 
5129867e207SSatish Balay   if (v) {
5139867e207SSatish Balay     *v = 0;
5149867e207SSatish Balay     if (*nz) {
5159867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
5169867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5179867e207SSatish Balay         v_i  = *v + i*bs;
5187e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
5197e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
5209867e207SSatish Balay       }
5219867e207SSatish Balay     }
5229867e207SSatish Balay   }
5239867e207SSatish Balay 
5249867e207SSatish Balay   if (idx) {
5259867e207SSatish Balay     *idx = 0;
5269867e207SSatish Balay     if (*nz) {
5279867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5289867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5299867e207SSatish Balay         idx_i = *idx + i*bs;
5309867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5319867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5329867e207SSatish Balay       }
5339867e207SSatish Balay     }
5349867e207SSatish Balay   }
5359867e207SSatish Balay   return 0;
5369867e207SSatish Balay }
5379867e207SSatish Balay 
538*5615d1e5SSatish Balay #undef __FUNC__
539*5615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_SeqBAIJ"
5409867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5419867e207SSatish Balay {
5429867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5439867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5449867e207SSatish Balay   return 0;
5459867e207SSatish Balay }
546b6490206SBarry Smith 
547*5615d1e5SSatish Balay #undef __FUNC__
548*5615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
549f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
550f2501298SSatish Balay {
551f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
552f2501298SSatish Balay   Mat         C;
553f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5549df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
555f2501298SSatish Balay   Scalar      *array=a->a;
556f2501298SSatish Balay 
557f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
558e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
559f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
560f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
561a7c10996SSatish Balay 
562a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
563f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
564f2501298SSatish Balay   PetscFree(col);
565f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
566f2501298SSatish Balay   cols = rows + bs;
567f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
568f2501298SSatish Balay     cols[0] = i*bs;
569f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
570f2501298SSatish Balay     len = ai[i+1] - ai[i];
571f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
572f2501298SSatish Balay       rows[0] = (*aj++)*bs;
573f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
574f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
575f2501298SSatish Balay       array += bs2;
576f2501298SSatish Balay     }
577f2501298SSatish Balay   }
5781073c447SSatish Balay   PetscFree(rows);
579f2501298SSatish Balay 
5806d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5816d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
582f2501298SSatish Balay 
583f2501298SSatish Balay   if (B != PETSC_NULL) {
584f2501298SSatish Balay     *B = C;
585f2501298SSatish Balay   } else {
586f2501298SSatish Balay     /* This isn't really an in-place transpose */
587f2501298SSatish Balay     PetscFree(a->a);
588f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
589f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
590f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
591f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
592f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
593f2501298SSatish Balay     PetscFree(a);
594f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
595f2501298SSatish Balay     PetscHeaderDestroy(C);
596f2501298SSatish Balay   }
597f2501298SSatish Balay   return 0;
598f2501298SSatish Balay }
599f2501298SSatish Balay 
600f2501298SSatish Balay 
601*5615d1e5SSatish Balay #undef __FUNC__
602*5615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
603584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
604584200bdSSatish Balay {
605584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
606584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
607a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
6089df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
609584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
610584200bdSSatish Balay 
6116d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
612584200bdSSatish Balay 
613584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
614584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
615584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
616584200bdSSatish Balay     if (fshift) {
617a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
618584200bdSSatish Balay       N = ailen[i];
619584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
620584200bdSSatish Balay         ip[j-fshift] = ip[j];
6217e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
622584200bdSSatish Balay       }
623584200bdSSatish Balay     }
624584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
625584200bdSSatish Balay   }
626584200bdSSatish Balay   if (mbs) {
627584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
628584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
629584200bdSSatish Balay   }
630584200bdSSatish Balay   /* reset ilen and imax for each row */
631584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
632584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
633584200bdSSatish Balay   }
634a7c10996SSatish Balay   a->nz = ai[mbs];
635584200bdSSatish Balay 
636584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
637584200bdSSatish Balay   if (fshift && a->diag) {
638584200bdSSatish Balay     PetscFree(a->diag);
639584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
640584200bdSSatish Balay     a->diag = 0;
641584200bdSSatish Balay   }
6423d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
643219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
6443d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
645584200bdSSatish Balay            a->reallocs);
646e2f3b5e9SSatish Balay   a->reallocs          = 0;
6474e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
6484e220ebcSLois Curfman McInnes 
649584200bdSSatish Balay   return 0;
650584200bdSSatish Balay }
651584200bdSSatish Balay 
652*5615d1e5SSatish Balay #undef __FUNC__
653*5615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
654b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6552593348eSBarry Smith {
656b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6579df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6582593348eSBarry Smith   return 0;
6592593348eSBarry Smith }
6602593348eSBarry Smith 
661*5615d1e5SSatish Balay #undef __FUNC__
662*5615d1e5SSatish Balay #define __FUNC__ "MatDestroy_SeqBAIJ"
663b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6642593348eSBarry Smith {
6652593348eSBarry Smith   Mat         A  = (Mat) obj;
666b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
66790f02eecSBarry Smith   int         ierr;
6682593348eSBarry Smith 
6692593348eSBarry Smith #if defined(PETSC_LOG)
6702593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6712593348eSBarry Smith #endif
6722593348eSBarry Smith   PetscFree(a->a);
6732593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6742593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6752593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6762593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6772593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
678de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6792593348eSBarry Smith   PetscFree(a);
68090f02eecSBarry Smith   if (A->mapping) {
68190f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
68290f02eecSBarry Smith   }
683f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
684f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6852593348eSBarry Smith   return 0;
6862593348eSBarry Smith }
6872593348eSBarry Smith 
688*5615d1e5SSatish Balay #undef __FUNC__
689*5615d1e5SSatish Balay #define __FUNC__ "MatSetOption_SeqBAIJ"
690b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6912593348eSBarry Smith {
692b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6936d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6946d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6956d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
696219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
6976d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6986d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6996d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
700219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
7016d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
7026d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
70390f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
70490f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
70594a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
7066d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
707e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
7082593348eSBarry Smith   else
709e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
7102593348eSBarry Smith   return 0;
7112593348eSBarry Smith }
7122593348eSBarry Smith 
7132593348eSBarry Smith 
7142593348eSBarry Smith /* -------------------------------------------------------*/
7152593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
7162593348eSBarry Smith /* -------------------------------------------------------*/
717b6490206SBarry Smith #include "pinclude/plapack.h"
718b6490206SBarry Smith 
719*5615d1e5SSatish Balay #undef __FUNC__
720*5615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
72139b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
7222593348eSBarry Smith {
723b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
72439b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
725c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
7262593348eSBarry Smith 
727c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
728c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
729b6490206SBarry Smith 
730119a934aSSatish Balay   idx   = a->j;
731119a934aSSatish Balay   v     = a->a;
732119a934aSSatish Balay   ii    = a->i;
733119a934aSSatish Balay 
734119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
735119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
736119a934aSSatish Balay     sum  = 0.0;
737119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
7381a6a6d98SBarry Smith     z[i] = sum;
739119a934aSSatish Balay   }
740c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
741c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
74239b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
74339b95ed1SBarry Smith   return 0;
74439b95ed1SBarry Smith }
74539b95ed1SBarry Smith 
746*5615d1e5SSatish Balay #undef __FUNC__
747*5615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
74839b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
74939b95ed1SBarry Smith {
75039b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
75139b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
75239b95ed1SBarry Smith   register Scalar x1,x2;
75339b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
754c16cb8f2SBarry Smith   int             j,n;
75539b95ed1SBarry Smith 
756c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
757c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
75839b95ed1SBarry Smith 
75939b95ed1SBarry Smith   idx   = a->j;
76039b95ed1SBarry Smith   v     = a->a;
76139b95ed1SBarry Smith   ii    = a->i;
76239b95ed1SBarry Smith 
763119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
764119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
765119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
766119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
767119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
768119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
769119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
770119a934aSSatish Balay       v += 4;
771119a934aSSatish Balay     }
7721a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
773119a934aSSatish Balay     z += 2;
774119a934aSSatish Balay   }
775c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
776c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
77739b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
77839b95ed1SBarry Smith   return 0;
77939b95ed1SBarry Smith }
78039b95ed1SBarry Smith 
781*5615d1e5SSatish Balay #undef __FUNC__
782*5615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
78339b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
78439b95ed1SBarry Smith {
78539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
78639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
787c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
78839b95ed1SBarry Smith 
789c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
790c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
79139b95ed1SBarry Smith 
79239b95ed1SBarry Smith   idx   = a->j;
79339b95ed1SBarry Smith   v     = a->a;
79439b95ed1SBarry Smith   ii    = a->i;
79539b95ed1SBarry Smith 
796119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
797119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
798119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
799119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
800119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
801119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
802119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
803119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
804119a934aSSatish Balay       v += 9;
805119a934aSSatish Balay     }
8061a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
807119a934aSSatish Balay     z += 3;
808119a934aSSatish Balay   }
809c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
810c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
81139b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
81239b95ed1SBarry Smith   return 0;
81339b95ed1SBarry Smith }
81439b95ed1SBarry Smith 
815*5615d1e5SSatish Balay #undef __FUNC__
816*5615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
81739b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
81839b95ed1SBarry Smith {
81939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
82039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
82139b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
82239b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
823c16cb8f2SBarry Smith   int             j,n;
82439b95ed1SBarry Smith 
825c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
826c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
82739b95ed1SBarry Smith 
82839b95ed1SBarry Smith   idx   = a->j;
82939b95ed1SBarry Smith   v     = a->a;
83039b95ed1SBarry Smith   ii    = a->i;
83139b95ed1SBarry Smith 
832119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
833119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
834119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
835119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
836119a934aSSatish Balay       xb = x + 4*(*idx++);
837119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
838119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
839119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
840119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
841119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
842119a934aSSatish Balay       v += 16;
843119a934aSSatish Balay     }
8441a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
845119a934aSSatish Balay     z += 4;
846119a934aSSatish Balay   }
847c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
848c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
84939b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
85039b95ed1SBarry Smith   return 0;
85139b95ed1SBarry Smith }
85239b95ed1SBarry Smith 
853*5615d1e5SSatish Balay #undef __FUNC__
854*5615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
85539b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
85639b95ed1SBarry Smith {
85739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
85839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
85939b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
860c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
86139b95ed1SBarry Smith 
862c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
863c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
86439b95ed1SBarry Smith 
86539b95ed1SBarry Smith   idx   = a->j;
86639b95ed1SBarry Smith   v     = a->a;
86739b95ed1SBarry Smith   ii    = a->i;
86839b95ed1SBarry Smith 
869119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
870119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
871119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
872119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
873119a934aSSatish Balay       xb = x + 5*(*idx++);
874119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
875119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
876119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
877119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
878119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
879119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
880119a934aSSatish Balay       v += 25;
881119a934aSSatish Balay     }
8821a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
883119a934aSSatish Balay     z += 5;
884119a934aSSatish Balay   }
885c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
886c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
88739b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
88839b95ed1SBarry Smith   return 0;
88939b95ed1SBarry Smith }
89039b95ed1SBarry Smith 
891*5615d1e5SSatish Balay #undef __FUNC__
892*5615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
89339b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
89439b95ed1SBarry Smith {
89539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
89639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
897c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
898c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
899c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
90039b95ed1SBarry Smith 
901c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
902c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
90339b95ed1SBarry Smith 
90439b95ed1SBarry Smith   idx   = a->j;
90539b95ed1SBarry Smith   v     = a->a;
90639b95ed1SBarry Smith   ii    = a->i;
90739b95ed1SBarry Smith 
90839b95ed1SBarry Smith 
909119a934aSSatish Balay   if (!a->mult_work) {
9103b547af2SSatish Balay     k = PetscMax(a->m,a->n);
9112b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
912119a934aSSatish Balay   }
913119a934aSSatish Balay   work = a->mult_work;
914119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
915119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
916119a934aSSatish Balay     ncols = n*bs;
917119a934aSSatish Balay     workt = work;
918119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
919119a934aSSatish Balay       xb = x + bs*(*idx++);
920119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
921119a934aSSatish Balay       workt += bs;
922119a934aSSatish Balay     }
9231a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
924119a934aSSatish Balay     v += n*bs2;
925119a934aSSatish Balay     z += bs;
926119a934aSSatish Balay   }
927c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
928c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
9291a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
930bb42667fSSatish Balay   return 0;
931bb42667fSSatish Balay }
932bb42667fSSatish Balay 
933*5615d1e5SSatish Balay #undef __FUNC__
934*5615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
935f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
936f44d3a6dSSatish Balay {
937f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
938f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
939c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
940f44d3a6dSSatish Balay 
941c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
942c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
943c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
944f44d3a6dSSatish Balay 
945f44d3a6dSSatish Balay   idx   = a->j;
946f44d3a6dSSatish Balay   v     = a->a;
947f44d3a6dSSatish Balay   ii    = a->i;
948f44d3a6dSSatish Balay 
949f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
950f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
951f44d3a6dSSatish Balay     sum  = y[i];
952f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
953f44d3a6dSSatish Balay     z[i] = sum;
954f44d3a6dSSatish Balay   }
955c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
956c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
957c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
958f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
959f44d3a6dSSatish Balay   return 0;
960f44d3a6dSSatish Balay }
961f44d3a6dSSatish Balay 
962*5615d1e5SSatish Balay #undef __FUNC__
963*5615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
964f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
965f44d3a6dSSatish Balay {
966f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
967f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
968f44d3a6dSSatish Balay   register Scalar x1,x2;
969f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
970c16cb8f2SBarry Smith   int             j,n;
971f44d3a6dSSatish Balay 
972c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
973c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
974c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
975f44d3a6dSSatish Balay 
976f44d3a6dSSatish Balay   idx   = a->j;
977f44d3a6dSSatish Balay   v     = a->a;
978f44d3a6dSSatish Balay   ii    = a->i;
979f44d3a6dSSatish Balay 
980f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
981f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
982f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
983f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
984f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
985f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
986f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
987f44d3a6dSSatish Balay       v += 4;
988f44d3a6dSSatish Balay     }
989f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
990f44d3a6dSSatish Balay     z += 2; y += 2;
991f44d3a6dSSatish Balay   }
992c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
993c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
994c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
995f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
996f44d3a6dSSatish Balay   return 0;
997f44d3a6dSSatish Balay }
998f44d3a6dSSatish Balay 
999*5615d1e5SSatish Balay #undef __FUNC__
1000*5615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1001f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1002f44d3a6dSSatish Balay {
1003f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1004f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1005c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1006f44d3a6dSSatish Balay 
1007c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1008c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1009c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1010f44d3a6dSSatish Balay 
1011f44d3a6dSSatish Balay   idx   = a->j;
1012f44d3a6dSSatish Balay   v     = a->a;
1013f44d3a6dSSatish Balay   ii    = a->i;
1014f44d3a6dSSatish Balay 
1015f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1016f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1017f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1018f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1019f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1020f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1021f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1022f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1023f44d3a6dSSatish Balay       v += 9;
1024f44d3a6dSSatish Balay     }
1025f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1026f44d3a6dSSatish Balay     z += 3; y += 3;
1027f44d3a6dSSatish Balay   }
1028c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1029c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1030c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1031f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1032f44d3a6dSSatish Balay   return 0;
1033f44d3a6dSSatish Balay }
1034f44d3a6dSSatish Balay 
1035*5615d1e5SSatish Balay #undef __FUNC__
1036*5615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1037f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1038f44d3a6dSSatish Balay {
1039f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1040f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1041f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1042f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1043c16cb8f2SBarry Smith   int             j,n;
1044f44d3a6dSSatish Balay 
1045c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1046c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1047c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1048f44d3a6dSSatish Balay 
1049f44d3a6dSSatish Balay   idx   = a->j;
1050f44d3a6dSSatish Balay   v     = a->a;
1051f44d3a6dSSatish Balay   ii    = a->i;
1052f44d3a6dSSatish Balay 
1053f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1054f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1055f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1056f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1057f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1058f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1059f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1060f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1061f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1062f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1063f44d3a6dSSatish Balay       v += 16;
1064f44d3a6dSSatish Balay     }
1065f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1066f44d3a6dSSatish Balay     z += 4; y += 4;
1067f44d3a6dSSatish Balay   }
1068c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1069c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1070c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1071f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1072f44d3a6dSSatish Balay   return 0;
1073f44d3a6dSSatish Balay }
1074f44d3a6dSSatish Balay 
1075*5615d1e5SSatish Balay #undef __FUNC__
1076*5615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1077f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1078f44d3a6dSSatish Balay {
1079f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1080f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1081f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1082c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1083f44d3a6dSSatish Balay 
1084c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1085c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1086c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1087f44d3a6dSSatish Balay 
1088f44d3a6dSSatish Balay   idx   = a->j;
1089f44d3a6dSSatish Balay   v     = a->a;
1090f44d3a6dSSatish Balay   ii    = a->i;
1091f44d3a6dSSatish Balay 
1092f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1093f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1094f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1095f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1096f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1097f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1098f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1099f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1100f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1101f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1102f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1103f44d3a6dSSatish Balay       v += 25;
1104f44d3a6dSSatish Balay     }
1105f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1106f44d3a6dSSatish Balay     z += 5; y += 5;
1107f44d3a6dSSatish Balay   }
1108c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1109c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1110c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1111f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1112f44d3a6dSSatish Balay   return 0;
1113f44d3a6dSSatish Balay }
1114f44d3a6dSSatish Balay 
1115*5615d1e5SSatish Balay #undef __FUNC__
1116*5615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1117f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1118f44d3a6dSSatish Balay {
1119f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1120f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1121f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1122f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1123f44d3a6dSSatish Balay 
1124f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1125f44d3a6dSSatish Balay 
1126c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1127c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1128f44d3a6dSSatish Balay 
1129f44d3a6dSSatish Balay   idx   = a->j;
1130f44d3a6dSSatish Balay   v     = a->a;
1131f44d3a6dSSatish Balay   ii    = a->i;
1132f44d3a6dSSatish Balay 
1133f44d3a6dSSatish Balay 
1134f44d3a6dSSatish Balay   if (!a->mult_work) {
1135f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1136f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1137f44d3a6dSSatish Balay   }
1138f44d3a6dSSatish Balay   work = a->mult_work;
1139f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1140f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1141f44d3a6dSSatish Balay     ncols = n*bs;
1142f44d3a6dSSatish Balay     workt = work;
1143f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1144f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1145f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1146f44d3a6dSSatish Balay       workt += bs;
1147f44d3a6dSSatish Balay     }
1148f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1149f44d3a6dSSatish Balay     v += n*bs2;
1150f44d3a6dSSatish Balay     z += bs;
1151f44d3a6dSSatish Balay   }
1152c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1153c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1154f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1155f44d3a6dSSatish Balay   return 0;
1156f44d3a6dSSatish Balay }
1157f44d3a6dSSatish Balay 
1158*5615d1e5SSatish Balay #undef __FUNC__
1159*5615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
11601a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1161bb42667fSSatish Balay {
1162bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
11631a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1164bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1165bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
11669df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1167119a934aSSatish Balay 
1168119a934aSSatish Balay 
116990f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
117090f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1171bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1172bb42667fSSatish Balay 
1173119a934aSSatish Balay   idx   = a->j;
1174119a934aSSatish Balay   v     = a->a;
1175119a934aSSatish Balay   ii    = a->i;
1176119a934aSSatish Balay 
1177119a934aSSatish Balay   switch (bs) {
1178119a934aSSatish Balay   case 1:
1179119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1180119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1181119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1182119a934aSSatish Balay       ib = idx + ai[i];
1183119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1184bb1453f0SSatish Balay         rval    = ib[j];
1185bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1186119a934aSSatish Balay       }
1187119a934aSSatish Balay     }
1188119a934aSSatish Balay     break;
1189119a934aSSatish Balay   case 2:
1190119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1191119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1192119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1193119a934aSSatish Balay       ib = idx + ai[i];
1194119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1195119a934aSSatish Balay         rval      = ib[j]*2;
1196bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1197bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1198119a934aSSatish Balay         v += 4;
1199119a934aSSatish Balay       }
1200119a934aSSatish Balay     }
1201119a934aSSatish Balay     break;
1202119a934aSSatish Balay   case 3:
1203119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1204119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1205119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1206119a934aSSatish Balay       ib = idx + ai[i];
1207119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1208119a934aSSatish Balay         rval      = ib[j]*3;
1209bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1210bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1211bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1212119a934aSSatish Balay         v += 9;
1213119a934aSSatish Balay       }
1214119a934aSSatish Balay     }
1215119a934aSSatish Balay     break;
1216119a934aSSatish Balay   case 4:
1217119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1218119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1219119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1220119a934aSSatish Balay       ib = idx + ai[i];
1221119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1222119a934aSSatish Balay         rval      = ib[j]*4;
1223bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1224bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1225bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1226bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1227119a934aSSatish Balay         v += 16;
1228119a934aSSatish Balay       }
1229119a934aSSatish Balay     }
1230119a934aSSatish Balay     break;
1231119a934aSSatish Balay   case 5:
1232119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1233119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1234119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1235119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1236119a934aSSatish Balay       ib = idx + ai[i];
1237119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1238119a934aSSatish Balay         rval      = ib[j]*5;
1239bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1240bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1241bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1242bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1243bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1244119a934aSSatish Balay         v += 25;
1245119a934aSSatish Balay       }
1246119a934aSSatish Balay     }
1247119a934aSSatish Balay     break;
1248119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1249119a934aSSatish Balay     default: {
1250119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1251119a934aSSatish Balay       if (!a->mult_work) {
12523b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1253bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1254119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1255119a934aSSatish Balay       }
1256119a934aSSatish Balay       work = a->mult_work;
1257119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1258119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1259119a934aSSatish Balay         ncols = n*bs;
1260119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1261119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1262119a934aSSatish Balay         v += n*bs2;
1263119a934aSSatish Balay         x += bs;
1264119a934aSSatish Balay         workt = work;
1265119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1266119a934aSSatish Balay           zb = z + bs*(*idx++);
1267bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1268119a934aSSatish Balay           workt += bs;
1269119a934aSSatish Balay         }
1270119a934aSSatish Balay       }
1271119a934aSSatish Balay     }
1272119a934aSSatish Balay   }
12730419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12740419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1275faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1276faf6580fSSatish Balay   return 0;
1277faf6580fSSatish Balay }
12781c351548SSatish Balay 
1279*5615d1e5SSatish Balay #undef __FUNC__
1280*5615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1281faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1282faf6580fSSatish Balay {
1283faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1284faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1285faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1286faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1287faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1288faf6580fSSatish Balay 
1289faf6580fSSatish Balay 
1290faf6580fSSatish Balay 
129190f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
129290f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1293faf6580fSSatish Balay 
1294648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1295648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1296648c1d95SSatish Balay 
1297faf6580fSSatish Balay 
1298faf6580fSSatish Balay   idx   = a->j;
1299faf6580fSSatish Balay   v     = a->a;
1300faf6580fSSatish Balay   ii    = a->i;
1301faf6580fSSatish Balay 
1302faf6580fSSatish Balay   switch (bs) {
1303faf6580fSSatish Balay   case 1:
1304faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1305faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1306faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1307faf6580fSSatish Balay       ib = idx + ai[i];
1308faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1309faf6580fSSatish Balay         rval    = ib[j];
1310faf6580fSSatish Balay         z[rval] += *v++ * x1;
1311faf6580fSSatish Balay       }
1312faf6580fSSatish Balay     }
1313faf6580fSSatish Balay     break;
1314faf6580fSSatish Balay   case 2:
1315faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1316faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1317faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1318faf6580fSSatish Balay       ib = idx + ai[i];
1319faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1320faf6580fSSatish Balay         rval      = ib[j]*2;
1321faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1322faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1323faf6580fSSatish Balay         v += 4;
1324faf6580fSSatish Balay       }
1325faf6580fSSatish Balay     }
1326faf6580fSSatish Balay     break;
1327faf6580fSSatish Balay   case 3:
1328faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1329faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1330faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1331faf6580fSSatish Balay       ib = idx + ai[i];
1332faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1333faf6580fSSatish Balay         rval      = ib[j]*3;
1334faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1335faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1336faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1337faf6580fSSatish Balay         v += 9;
1338faf6580fSSatish Balay       }
1339faf6580fSSatish Balay     }
1340faf6580fSSatish Balay     break;
1341faf6580fSSatish Balay   case 4:
1342faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1343faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1344faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1345faf6580fSSatish Balay       ib = idx + ai[i];
1346faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1347faf6580fSSatish Balay         rval      = ib[j]*4;
1348faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1349faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1350faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1351faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1352faf6580fSSatish Balay         v += 16;
1353faf6580fSSatish Balay       }
1354faf6580fSSatish Balay     }
1355faf6580fSSatish Balay     break;
1356faf6580fSSatish Balay   case 5:
1357faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1358faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1359faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1360faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1361faf6580fSSatish Balay       ib = idx + ai[i];
1362faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1363faf6580fSSatish Balay         rval      = ib[j]*5;
1364faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1365faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1366faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1367faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1368faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1369faf6580fSSatish Balay         v += 25;
1370faf6580fSSatish Balay       }
1371faf6580fSSatish Balay     }
1372faf6580fSSatish Balay     break;
1373faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1374faf6580fSSatish Balay     default: {
1375faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1376faf6580fSSatish Balay       if (!a->mult_work) {
1377faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1378faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1379faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1380faf6580fSSatish Balay       }
1381faf6580fSSatish Balay       work = a->mult_work;
1382faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1383faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1384faf6580fSSatish Balay         ncols = n*bs;
1385faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1386faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1387faf6580fSSatish Balay         v += n*bs2;
1388faf6580fSSatish Balay         x += bs;
1389faf6580fSSatish Balay         workt = work;
1390faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1391faf6580fSSatish Balay           zb = z + bs*(*idx++);
1392faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1393faf6580fSSatish Balay           workt += bs;
1394faf6580fSSatish Balay         }
1395faf6580fSSatish Balay       }
1396faf6580fSSatish Balay     }
1397faf6580fSSatish Balay   }
1398faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1399faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1400faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
14012593348eSBarry Smith   return 0;
14022593348eSBarry Smith }
14032593348eSBarry Smith 
1404*5615d1e5SSatish Balay #undef __FUNC__
1405*5615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_SeqBAIJ"
14064e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
14072593348eSBarry Smith {
1408b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14094e220ebcSLois Curfman McInnes 
14104e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
14114e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
14124e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
14134e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
14144e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
14154e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
14164e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
14174e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
14184e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
14194e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
14204e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
14214e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
14224e220ebcSLois Curfman McInnes   info->memory       = A->mem;
14234e220ebcSLois Curfman McInnes   if (A->factor) {
14244e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
14254e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
14264e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
14274e220ebcSLois Curfman McInnes   } else {
14284e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
14294e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
14304e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
14314e220ebcSLois Curfman McInnes   }
14322593348eSBarry Smith   return 0;
14332593348eSBarry Smith }
14342593348eSBarry Smith 
1435*5615d1e5SSatish Balay #undef __FUNC__
1436*5615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
143791d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
143891d316f6SSatish Balay {
143991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
144091d316f6SSatish Balay 
1441e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
144291d316f6SSatish Balay 
144391d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
144491d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1445a7c10996SSatish Balay       (a->nz != b->nz)) {
144691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
144791d316f6SSatish Balay   }
144891d316f6SSatish Balay 
144991d316f6SSatish Balay   /* if the a->i are the same */
145091d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
145191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
145291d316f6SSatish Balay   }
145391d316f6SSatish Balay 
145491d316f6SSatish Balay   /* if a->j are the same */
145591d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
145691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
145791d316f6SSatish Balay   }
145891d316f6SSatish Balay 
145991d316f6SSatish Balay   /* if a->a are the same */
146091d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
146191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
146291d316f6SSatish Balay   }
146391d316f6SSatish Balay   *flg = PETSC_TRUE;
146491d316f6SSatish Balay   return 0;
146591d316f6SSatish Balay 
146691d316f6SSatish Balay }
146791d316f6SSatish Balay 
1468*5615d1e5SSatish Balay #undef __FUNC__
1469*5615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
147091d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
147191d316f6SSatish Balay {
147291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14737e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
147417e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
147517e48fc4SSatish Balay 
147617e48fc4SSatish Balay   bs   = a->bs;
147717e48fc4SSatish Balay   aa   = a->a;
147817e48fc4SSatish Balay   ai   = a->i;
147917e48fc4SSatish Balay   aj   = a->j;
148017e48fc4SSatish Balay   ambs = a->mbs;
14819df24120SSatish Balay   bs2  = a->bs2;
148291d316f6SSatish Balay 
148391d316f6SSatish Balay   VecSet(&zero,v);
148490f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1485e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
148617e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
148717e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
148817e48fc4SSatish Balay       if (aj[j] == i) {
148917e48fc4SSatish Balay         row  = i*bs;
14907e67e3f9SSatish Balay         aa_j = aa+j*bs2;
14917e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
149291d316f6SSatish Balay         break;
149391d316f6SSatish Balay       }
149491d316f6SSatish Balay     }
149591d316f6SSatish Balay   }
149691d316f6SSatish Balay   return 0;
149791d316f6SSatish Balay }
149891d316f6SSatish Balay 
1499*5615d1e5SSatish Balay #undef __FUNC__
1500*5615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
15019867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
15029867e207SSatish Balay {
15039867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15049867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
15057e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
15069867e207SSatish Balay 
15079867e207SSatish Balay   ai  = a->i;
15089867e207SSatish Balay   aj  = a->j;
15099867e207SSatish Balay   aa  = a->a;
15109867e207SSatish Balay   m   = a->m;
15119867e207SSatish Balay   n   = a->n;
15129867e207SSatish Balay   bs  = a->bs;
15139867e207SSatish Balay   mbs = a->mbs;
15149df24120SSatish Balay   bs2 = a->bs2;
15159867e207SSatish Balay   if (ll) {
151690f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1517e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
15189867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
15199867e207SSatish Balay       M  = ai[i+1] - ai[i];
15209867e207SSatish Balay       li = l + i*bs;
15217e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
15229867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
15237e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
15249867e207SSatish Balay           (*v++) *= li[k%bs];
15259867e207SSatish Balay         }
15269867e207SSatish Balay       }
15279867e207SSatish Balay     }
15289867e207SSatish Balay   }
15299867e207SSatish Balay 
15309867e207SSatish Balay   if (rr) {
153190f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1532e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
15339867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
15349867e207SSatish Balay       M  = ai[i+1] - ai[i];
15357e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
15369867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
15379867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
15389867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
15399867e207SSatish Balay           x = ri[k];
15409867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
15419867e207SSatish Balay         }
15429867e207SSatish Balay       }
15439867e207SSatish Balay     }
15449867e207SSatish Balay   }
15459867e207SSatish Balay   return 0;
15469867e207SSatish Balay }
15479867e207SSatish Balay 
15489867e207SSatish Balay 
1549b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1550b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
15512a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1552736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1553736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
15541a6a6d98SBarry Smith 
15551a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
15561a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
15571a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
15581a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
15591a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
15601a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
15611a6a6d98SBarry Smith 
15627fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
15637fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
15647fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
15657fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
15667fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
15677fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
15682593348eSBarry Smith 
1569*5615d1e5SSatish Balay #undef __FUNC__
1570*5615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
1571b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
15722593348eSBarry Smith {
1573b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15742593348eSBarry Smith   Scalar      *v = a->a;
15752593348eSBarry Smith   double      sum = 0.0;
15769df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
15772593348eSBarry Smith 
15782593348eSBarry Smith   if (type == NORM_FROBENIUS) {
15790419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
15802593348eSBarry Smith #if defined(PETSC_COMPLEX)
15812593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
15822593348eSBarry Smith #else
15832593348eSBarry Smith       sum += (*v)*(*v); v++;
15842593348eSBarry Smith #endif
15852593348eSBarry Smith     }
15862593348eSBarry Smith     *norm = sqrt(sum);
15872593348eSBarry Smith   }
15882593348eSBarry Smith   else {
1589e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
15902593348eSBarry Smith   }
15912593348eSBarry Smith   return 0;
15922593348eSBarry Smith }
15932593348eSBarry Smith 
15942593348eSBarry Smith /*
15952593348eSBarry Smith      note: This can only work for identity for row and col. It would
15962593348eSBarry Smith    be good to check this and otherwise generate an error.
15972593348eSBarry Smith */
1598*5615d1e5SSatish Balay #undef __FUNC__
1599*5615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
1600b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
16012593348eSBarry Smith {
1602b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
16032593348eSBarry Smith   Mat         outA;
1604de6a44a3SBarry Smith   int         ierr;
16052593348eSBarry Smith 
1606e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
16072593348eSBarry Smith 
16082593348eSBarry Smith   outA          = inA;
16092593348eSBarry Smith   inA->factor   = FACTOR_LU;
16102593348eSBarry Smith   a->row        = row;
16112593348eSBarry Smith   a->col        = col;
16122593348eSBarry Smith 
1613de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
16142593348eSBarry Smith 
16152593348eSBarry Smith   if (!a->diag) {
1616b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
16172593348eSBarry Smith   }
16187fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
16192593348eSBarry Smith   return 0;
16202593348eSBarry Smith }
16212593348eSBarry Smith 
1622*5615d1e5SSatish Balay #undef __FUNC__
1623*5615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
1624b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
16252593348eSBarry Smith {
1626b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
16279df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1628b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1629b6490206SBarry Smith   PLogFlops(totalnz);
16302593348eSBarry Smith   return 0;
16312593348eSBarry Smith }
16322593348eSBarry Smith 
1633*5615d1e5SSatish Balay #undef __FUNC__
1634*5615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
16357e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
16367e67e3f9SSatish Balay {
16377e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16387e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1639a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
16409df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
16417e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
16427e67e3f9SSatish Balay 
16437e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
16447e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1645e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1646e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1647a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
16487e67e3f9SSatish Balay     nrow = ailen[brow];
16497e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1650e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1651e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1652a7c10996SSatish Balay       col  = in[l] ;
16537e67e3f9SSatish Balay       bcol = col/bs;
16547e67e3f9SSatish Balay       cidx = col%bs;
16557e67e3f9SSatish Balay       ridx = row%bs;
16567e67e3f9SSatish Balay       high = nrow;
16577e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
16587e67e3f9SSatish Balay       while (high-low > 5) {
16597e67e3f9SSatish Balay         t = (low+high)/2;
16607e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
16617e67e3f9SSatish Balay         else             low  = t;
16627e67e3f9SSatish Balay       }
16637e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
16647e67e3f9SSatish Balay         if (rp[i] > bcol) break;
16657e67e3f9SSatish Balay         if (rp[i] == bcol) {
16667e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
16677e67e3f9SSatish Balay           goto finished;
16687e67e3f9SSatish Balay         }
16697e67e3f9SSatish Balay       }
16707e67e3f9SSatish Balay       *v++ = zero;
16717e67e3f9SSatish Balay       finished:;
16727e67e3f9SSatish Balay     }
16737e67e3f9SSatish Balay   }
16747e67e3f9SSatish Balay   return 0;
16757e67e3f9SSatish Balay }
16767e67e3f9SSatish Balay 
1677*5615d1e5SSatish Balay #undef __FUNC__
1678*5615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
16795a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
16805a838052SSatish Balay {
16815a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
16825a838052SSatish Balay   *bs = baij->bs;
16835a838052SSatish Balay   return 0;
16845a838052SSatish Balay }
16855a838052SSatish Balay 
1686d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1687*5615d1e5SSatish Balay #undef __FUNC__
1688*5615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1689d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1690d9b7c43dSSatish Balay {
1691d9b7c43dSSatish Balay   int i,row;
1692d9b7c43dSSatish Balay   row = idx[0];
1693d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1694d9b7c43dSSatish Balay 
1695d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1696d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1697d9b7c43dSSatish Balay   }
1698d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1699d9b7c43dSSatish Balay   return 0;
1700d9b7c43dSSatish Balay }
1701d9b7c43dSSatish Balay 
1702*5615d1e5SSatish Balay #undef __FUNC__
1703*5615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
1704d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1705d9b7c43dSSatish Balay {
1706d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1707d9b7c43dSSatish Balay   IS          is_local;
1708d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1709d9b7c43dSSatish Balay   PetscTruth  flg;
1710d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1711d9b7c43dSSatish Balay 
1712d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1713d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1714d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1715537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1716d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1717d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1718d9b7c43dSSatish Balay 
1719d9b7c43dSSatish Balay   i = 0;
1720d9b7c43dSSatish Balay   while (i < is_n) {
1721e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1722d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1723d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1724d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1725d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1726d9b7c43dSSatish Balay       i += bs;
1727d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1728d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1729d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1730d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1731d9b7c43dSSatish Balay         aa[0] = zero;
1732d9b7c43dSSatish Balay         aa+=bs;
1733d9b7c43dSSatish Balay       }
1734d9b7c43dSSatish Balay       i++;
1735d9b7c43dSSatish Balay     }
1736d9b7c43dSSatish Balay   }
1737d9b7c43dSSatish Balay   if (diag) {
1738d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1739d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1740d9b7c43dSSatish Balay     }
1741d9b7c43dSSatish Balay   }
1742d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1743d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1744d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
17459a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1746d9b7c43dSSatish Balay 
1747d9b7c43dSSatish Balay   return 0;
1748d9b7c43dSSatish Balay }
17491c351548SSatish Balay 
1750*5615d1e5SSatish Balay #undef __FUNC__
1751*5615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1752ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1753ba4ca20aSSatish Balay {
1754ba4ca20aSSatish Balay   static int called = 0;
1755ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1756ba4ca20aSSatish Balay 
1757ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1758ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1759ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1760ba4ca20aSSatish Balay   return 0;
1761ba4ca20aSSatish Balay }
1762d9b7c43dSSatish Balay 
17632593348eSBarry Smith /* -------------------------------------------------------------------*/
1764cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
17659867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1766f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1767faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
17681a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1769b6490206SBarry Smith        0,0,
1770de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1771b6490206SBarry Smith        0,
1772f2501298SSatish Balay        MatTranspose_SeqBAIJ,
177317e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
17749867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1775584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1776b6490206SBarry Smith        0,
1777d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
17787fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1779b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1780de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1781b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1782b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1783b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1784af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
17857e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1786ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
17873b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
17883b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
17893b2fbd54SBarry Smith        MatRestoreRowIJ_SeqBAIJ};
17902593348eSBarry Smith 
1791*5615d1e5SSatish Balay #undef __FUNC__
1792*5615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
17932593348eSBarry Smith /*@C
179444cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
179544cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
179644cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
17972bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
17982bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
17992593348eSBarry Smith 
18002593348eSBarry Smith    Input Parameters:
18012593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1802b6490206SBarry Smith .  bs - size of block
18032593348eSBarry Smith .  m - number of rows
18042593348eSBarry Smith .  n - number of columns
1805b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
18062bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
18072bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
18082593348eSBarry Smith 
18092593348eSBarry Smith    Output Parameter:
18102593348eSBarry Smith .  A - the matrix
18112593348eSBarry Smith 
18122593348eSBarry Smith    Notes:
181344cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
18142593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
181544cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
18162593348eSBarry Smith 
18172593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
18182593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
18192593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
18206da5968aSLois Curfman McInnes    matrices.
18212593348eSBarry Smith 
182244cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
18232593348eSBarry Smith @*/
1824b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
18252593348eSBarry Smith {
18262593348eSBarry Smith   Mat         B;
1827b6490206SBarry Smith   Mat_SeqBAIJ *b;
18283b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
18293b2fbd54SBarry Smith 
18303b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
1831e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
1832b6490206SBarry Smith 
1833f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1834e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
18352593348eSBarry Smith 
18362593348eSBarry Smith   *A = 0;
1837b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
18382593348eSBarry Smith   PLogObjectCreate(B);
1839b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
184044cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
18412593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
18421a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
18431a6a6d98SBarry Smith   if (!flg) {
18447fc0212eSBarry Smith     switch (bs) {
18457fc0212eSBarry Smith       case 1:
18467fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
18471a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
184839b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1849f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
18507fc0212eSBarry Smith        break;
18514eeb42bcSBarry Smith       case 2:
18524eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
18531a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
185439b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1855f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
18566d84be18SBarry Smith         break;
1857f327dd97SBarry Smith       case 3:
1858f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
18591a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
186039b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1861f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
18624eeb42bcSBarry Smith         break;
18636d84be18SBarry Smith       case 4:
18646d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
18651a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
186639b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1867f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
18686d84be18SBarry Smith         break;
18696d84be18SBarry Smith       case 5:
18706d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
18711a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
187239b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1873f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
18746d84be18SBarry Smith         break;
18757fc0212eSBarry Smith     }
18761a6a6d98SBarry Smith   }
1877b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1878b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
18792593348eSBarry Smith   B->factor           = 0;
18802593348eSBarry Smith   B->lupivotthreshold = 1.0;
188190f02eecSBarry Smith   B->mapping          = 0;
18822593348eSBarry Smith   b->row              = 0;
18832593348eSBarry Smith   b->col              = 0;
18842593348eSBarry Smith   b->reallocs         = 0;
18852593348eSBarry Smith 
188644cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
188744cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1888b6490206SBarry Smith   b->mbs     = mbs;
1889f2501298SSatish Balay   b->nbs     = nbs;
1890b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
18912593348eSBarry Smith   if (nnz == PETSC_NULL) {
1892119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
18932593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1894b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1895b6490206SBarry Smith     nz = nz*mbs;
18962593348eSBarry Smith   }
18972593348eSBarry Smith   else {
18982593348eSBarry Smith     nz = 0;
1899b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
19002593348eSBarry Smith   }
19012593348eSBarry Smith 
19022593348eSBarry Smith   /* allocate the matrix space */
19037e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
19042593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
19057e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
19067e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
19072593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
19082593348eSBarry Smith   b->i  = b->j + nz;
19092593348eSBarry Smith   b->singlemalloc = 1;
19102593348eSBarry Smith 
1911de6a44a3SBarry Smith   b->i[0] = 0;
1912b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
19132593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
19142593348eSBarry Smith   }
19152593348eSBarry Smith 
1916b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1917b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1918b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1919b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
19202593348eSBarry Smith 
1921b6490206SBarry Smith   b->bs               = bs;
19229df24120SSatish Balay   b->bs2              = bs2;
1923b6490206SBarry Smith   b->mbs              = mbs;
19242593348eSBarry Smith   b->nz               = 0;
192518e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
19262593348eSBarry Smith   b->sorted           = 0;
19272593348eSBarry Smith   b->roworiented      = 1;
19282593348eSBarry Smith   b->nonew            = 0;
19292593348eSBarry Smith   b->diag             = 0;
19302593348eSBarry Smith   b->solve_work       = 0;
1931de6a44a3SBarry Smith   b->mult_work        = 0;
19322593348eSBarry Smith   b->spptr            = 0;
19334e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
19344e220ebcSLois Curfman McInnes 
19352593348eSBarry Smith   *A = B;
19362593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
19372593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
19382593348eSBarry Smith   return 0;
19392593348eSBarry Smith }
19402593348eSBarry Smith 
1941*5615d1e5SSatish Balay #undef __FUNC__
1942*5615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
1943b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
19442593348eSBarry Smith {
19452593348eSBarry Smith   Mat         C;
1946b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
19479df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1948de6a44a3SBarry Smith 
1949e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
19502593348eSBarry Smith 
19512593348eSBarry Smith   *B = 0;
1952b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
19532593348eSBarry Smith   PLogObjectCreate(C);
1954b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
19552593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1956b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1957b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
19582593348eSBarry Smith   C->factor     = A->factor;
19592593348eSBarry Smith   c->row        = 0;
19602593348eSBarry Smith   c->col        = 0;
19612593348eSBarry Smith   C->assembled  = PETSC_TRUE;
19622593348eSBarry Smith 
196344cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
196444cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
196544cd7ae7SLois Curfman McInnes   C->M          = a->m;
196644cd7ae7SLois Curfman McInnes   C->N          = a->n;
196744cd7ae7SLois Curfman McInnes 
1968b6490206SBarry Smith   c->bs         = a->bs;
19699df24120SSatish Balay   c->bs2        = a->bs2;
1970b6490206SBarry Smith   c->mbs        = a->mbs;
1971b6490206SBarry Smith   c->nbs        = a->nbs;
19722593348eSBarry Smith 
1973b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1974b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1975b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
19762593348eSBarry Smith     c->imax[i] = a->imax[i];
19772593348eSBarry Smith     c->ilen[i] = a->ilen[i];
19782593348eSBarry Smith   }
19792593348eSBarry Smith 
19802593348eSBarry Smith   /* allocate the matrix space */
19812593348eSBarry Smith   c->singlemalloc = 1;
19827e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
19832593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
19847e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1985de6a44a3SBarry Smith   c->i  = c->j + nz;
1986b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1987b6490206SBarry Smith   if (mbs > 0) {
1988de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
19892593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
19907e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
19912593348eSBarry Smith     }
19922593348eSBarry Smith   }
19932593348eSBarry Smith 
1994b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
19952593348eSBarry Smith   c->sorted      = a->sorted;
19962593348eSBarry Smith   c->roworiented = a->roworiented;
19972593348eSBarry Smith   c->nonew       = a->nonew;
19982593348eSBarry Smith 
19992593348eSBarry Smith   if (a->diag) {
2000b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2001b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2002b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
20032593348eSBarry Smith       c->diag[i] = a->diag[i];
20042593348eSBarry Smith     }
20052593348eSBarry Smith   }
20062593348eSBarry Smith   else c->diag          = 0;
20072593348eSBarry Smith   c->nz                 = a->nz;
20082593348eSBarry Smith   c->maxnz              = a->maxnz;
20092593348eSBarry Smith   c->solve_work         = 0;
20102593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
20117fc0212eSBarry Smith   c->mult_work          = 0;
20122593348eSBarry Smith   *B = C;
20132593348eSBarry Smith   return 0;
20142593348eSBarry Smith }
20152593348eSBarry Smith 
2016*5615d1e5SSatish Balay #undef __FUNC__
2017*5615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
201819bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
20192593348eSBarry Smith {
2020b6490206SBarry Smith   Mat_SeqBAIJ  *a;
20212593348eSBarry Smith   Mat          B;
2022de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2023b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
202435aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2025a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2026b6490206SBarry Smith   Scalar       *aa;
202719bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
20282593348eSBarry Smith 
20291a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2030de6a44a3SBarry Smith   bs2  = bs*bs;
2031b6490206SBarry Smith 
20322593348eSBarry Smith   MPI_Comm_size(comm,&size);
2033e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
203490ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
203577c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2036e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
20372593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
20382593348eSBarry Smith 
2039e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
204035aab85fSBarry Smith 
204135aab85fSBarry Smith   /*
204235aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
204335aab85fSBarry Smith     divisible by the blocksize
204435aab85fSBarry Smith   */
2045b6490206SBarry Smith   mbs        = M/bs;
204635aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
204735aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
204835aab85fSBarry Smith   else                  mbs++;
204935aab85fSBarry Smith   if (extra_rows) {
2050537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
205135aab85fSBarry Smith   }
2052b6490206SBarry Smith 
20532593348eSBarry Smith   /* read in row lengths */
205435aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
205577c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
205635aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
20572593348eSBarry Smith 
2058b6490206SBarry Smith   /* read in column indices */
205935aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
206077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
206135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2062b6490206SBarry Smith 
2063b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2064b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2065b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
206635aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
206735aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
206835aab85fSBarry Smith   masked = mask + mbs;
2069b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2070b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
207135aab85fSBarry Smith     nmask = 0;
2072b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2073b6490206SBarry Smith       kmax = rowlengths[rowcount];
2074b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
207535aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
207635aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2077b6490206SBarry Smith       }
2078b6490206SBarry Smith       rowcount++;
2079b6490206SBarry Smith     }
208035aab85fSBarry Smith     browlengths[i] += nmask;
208135aab85fSBarry Smith     /* zero out the mask elements we set */
208235aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2083b6490206SBarry Smith   }
2084b6490206SBarry Smith 
20852593348eSBarry Smith   /* create our matrix */
208635aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
208735aab85fSBarry Smith          CHKERRQ(ierr);
20882593348eSBarry Smith   B = *A;
2089b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
20902593348eSBarry Smith 
20912593348eSBarry Smith   /* set matrix "i" values */
2092de6a44a3SBarry Smith   a->i[0] = 0;
2093b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2094b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2095b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
20962593348eSBarry Smith   }
2097b6490206SBarry Smith   a->nz         = 0;
2098b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
20992593348eSBarry Smith 
2100b6490206SBarry Smith   /* read in nonzero values */
210135aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
210277c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
210335aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2104b6490206SBarry Smith 
2105b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2106b6490206SBarry Smith   nzcount = 0; jcount = 0;
2107b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2108b6490206SBarry Smith     nzcountb = nzcount;
210935aab85fSBarry Smith     nmask    = 0;
2110b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2111b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2112b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
211335aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
211435aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2115b6490206SBarry Smith       }
2116b6490206SBarry Smith       rowcount++;
2117b6490206SBarry Smith     }
2118de6a44a3SBarry Smith     /* sort the masked values */
211977c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2120de6a44a3SBarry Smith 
2121b6490206SBarry Smith     /* set "j" values into matrix */
2122b6490206SBarry Smith     maskcount = 1;
212335aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
212435aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2125de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2126b6490206SBarry Smith     }
2127b6490206SBarry Smith     /* set "a" values into matrix */
2128de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2129b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2130b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2131b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2132de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2133de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2134de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2135de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2136b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2137b6490206SBarry Smith       }
2138b6490206SBarry Smith     }
213935aab85fSBarry Smith     /* zero out the mask elements we set */
214035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2141b6490206SBarry Smith   }
2142e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2143b6490206SBarry Smith 
2144b6490206SBarry Smith   PetscFree(rowlengths);
2145b6490206SBarry Smith   PetscFree(browlengths);
2146b6490206SBarry Smith   PetscFree(aa);
2147b6490206SBarry Smith   PetscFree(jj);
2148b6490206SBarry Smith   PetscFree(mask);
2149b6490206SBarry Smith 
2150b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2151de6a44a3SBarry Smith 
2152de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2153de6a44a3SBarry Smith   if (flg) {
215419bcc07fSBarry Smith     Viewer tviewer;
215519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2156639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
215719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
215819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2159de6a44a3SBarry Smith   }
2160de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2161de6a44a3SBarry Smith   if (flg) {
216219bcc07fSBarry Smith     Viewer tviewer;
216319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2164639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
216519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
216619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2167de6a44a3SBarry Smith   }
2168de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2169de6a44a3SBarry Smith   if (flg) {
217019bcc07fSBarry Smith     Viewer tviewer;
217119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
217219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
217319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2174de6a44a3SBarry Smith   }
2175de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2176de6a44a3SBarry Smith   if (flg) {
217719bcc07fSBarry Smith     Viewer tviewer;
217819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2179639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
218019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
218119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2182de6a44a3SBarry Smith   }
2183de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2184de6a44a3SBarry Smith   if (flg) {
218519bcc07fSBarry Smith     Viewer tviewer;
218619bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
218719bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
218819bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
218919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2190de6a44a3SBarry Smith   }
21912593348eSBarry Smith   return 0;
21922593348eSBarry Smith }
21932593348eSBarry Smith 
21942593348eSBarry Smith 
21952593348eSBarry Smith 
2196