xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3eee16b04de4467482e6ce56cbcc0108d577ea39)
12593348eSBarry Smith #ifndef lint
2*3eee16b0SBarry Smith static char vcid[] = "$Id: baij.c,v 1.105 1997/06/18 01:44:09 curfman Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
93369ce9aSBarry Smith 
103369ce9aSBarry Smith #include "pinclude/pviewer.h"
113369ce9aSBarry Smith #include "sys.h"
1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
131a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
141a6a6d98SBarry Smith #include "src/inline/spops.h"
152593348eSBarry Smith #include "petsc.h"
163b547af2SSatish Balay 
172593348eSBarry Smith 
18de6a44a3SBarry Smith /*
19de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
20de6a44a3SBarry Smith */
21de6a44a3SBarry Smith 
225615d1e5SSatish Balay #undef __FUNC__
235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
25de6a44a3SBarry Smith {
26de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
277fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
28de6a44a3SBarry Smith 
29de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
317fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
32de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33de6a44a3SBarry Smith       if (a->j[j] == i) {
34de6a44a3SBarry Smith         diag[i] = j;
35de6a44a3SBarry Smith         break;
36de6a44a3SBarry Smith       }
37de6a44a3SBarry Smith     }
38de6a44a3SBarry Smith   }
39de6a44a3SBarry Smith   a->diag = diag;
40de6a44a3SBarry Smith   return 0;
41de6a44a3SBarry Smith }
422593348eSBarry Smith 
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
725eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
935eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
11966963ce1SSatish Balay   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
13466963ce1SSatish Balay   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
1515eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
175766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
180766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
18144cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18344cd7ae7SLois Curfman McInnes #else
18444cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18544cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18644cd7ae7SLois Curfman McInnes #endif
18744cd7ae7SLois Curfman McInnes           }
18844cd7ae7SLois Curfman McInnes         }
18944cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19044cd7ae7SLois Curfman McInnes       }
19144cd7ae7SLois Curfman McInnes     }
19244cd7ae7SLois Curfman McInnes   }
1932593348eSBarry Smith   else {
194b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
195b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
196b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
197b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
200766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
20188685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2027e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
204766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
205766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
206766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
207766eeae4SLois Curfman McInnes           }
20888685aaeSLois Curfman McInnes           else {
2097e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
21088685aaeSLois Curfman McInnes           }
21188685aaeSLois Curfman McInnes #else
2127e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
21388685aaeSLois Curfman McInnes #endif
2142593348eSBarry Smith           }
2152593348eSBarry Smith         }
2162593348eSBarry Smith         fprintf(fd,"\n");
2172593348eSBarry Smith       }
2182593348eSBarry Smith     }
219b6490206SBarry Smith   }
2202593348eSBarry Smith   fflush(fd);
2212593348eSBarry Smith   return 0;
2222593348eSBarry Smith }
2232593348eSBarry Smith 
2245615d1e5SSatish Balay #undef __FUNC__
2255eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */
2263270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2273270192aSSatish Balay {
2283270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2293270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2303270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2313270192aSSatish Balay   Scalar       *aa;
2323270192aSSatish Balay   Draw         draw;
2333270192aSSatish Balay   DrawButton   button;
2343270192aSSatish Balay   PetscTruth   isnull;
2353270192aSSatish Balay 
2363270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2373270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2383270192aSSatish Balay 
2393270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2403270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2413270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2423270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2433270192aSSatish Balay   color = DRAW_BLUE;
2443270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2453270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2463270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2473270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2483270192aSSatish Balay       aa = a->a + j*bs2;
2493270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2503270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2513270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
252b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2533270192aSSatish Balay         }
2543270192aSSatish Balay       }
2553270192aSSatish Balay     }
2563270192aSSatish Balay   }
2573270192aSSatish Balay   color = DRAW_CYAN;
2583270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2593270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2603270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2613270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2623270192aSSatish Balay       aa = a->a + j*bs2;
2633270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2643270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2653270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
266b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2673270192aSSatish Balay         }
2683270192aSSatish Balay       }
2693270192aSSatish Balay     }
2703270192aSSatish Balay   }
2713270192aSSatish Balay 
2723270192aSSatish Balay   color = DRAW_RED;
2733270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2743270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2753270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2763270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2773270192aSSatish Balay       aa = a->a + j*bs2;
2783270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2793270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2803270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
281b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2823270192aSSatish Balay         }
2833270192aSSatish Balay       }
2843270192aSSatish Balay     }
2853270192aSSatish Balay   }
2863270192aSSatish Balay 
2873270192aSSatish Balay   DrawFlush(draw);
2883270192aSSatish Balay   DrawGetPause(draw,&pause);
2893270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2903270192aSSatish Balay 
2913270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2923270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2933270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2943270192aSSatish Balay     DrawClear(draw);
2953270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2963270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2973270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2983270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2993270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
3003270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
3013270192aSSatish Balay     w *= scale; h *= scale;
3023270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
3033270192aSSatish Balay     color = DRAW_BLUE;
3043270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3053270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3063270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3073270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3083270192aSSatish Balay         aa = a->a + j*bs2;
3093270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3103270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3113270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
312b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3133270192aSSatish Balay           }
3143270192aSSatish Balay         }
3153270192aSSatish Balay       }
3163270192aSSatish Balay     }
3173270192aSSatish Balay     color = DRAW_CYAN;
3183270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3193270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3203270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3213270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3223270192aSSatish Balay         aa = a->a + j*bs2;
3233270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3243270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3253270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
326b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3273270192aSSatish Balay           }
3283270192aSSatish Balay         }
3293270192aSSatish Balay       }
3303270192aSSatish Balay     }
3313270192aSSatish Balay 
3323270192aSSatish Balay     color = DRAW_RED;
3333270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3343270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3353270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3363270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3373270192aSSatish Balay         aa = a->a + j*bs2;
3383270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3393270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3403270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
341b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3423270192aSSatish Balay           }
3433270192aSSatish Balay         }
3443270192aSSatish Balay       }
3453270192aSSatish Balay     }
3463270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3473270192aSSatish Balay   }
3483270192aSSatish Balay   return 0;
3493270192aSSatish Balay }
3503270192aSSatish Balay 
3515615d1e5SSatish Balay #undef __FUNC__
3525eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */
3538f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3542593348eSBarry Smith {
3552593348eSBarry Smith   Mat         A = (Mat) obj;
35619bcc07fSBarry Smith   ViewerType  vtype;
35719bcc07fSBarry Smith   int         ierr;
3582593348eSBarry Smith 
35919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
361e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3622593348eSBarry Smith   }
36319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
364b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3652593348eSBarry Smith   }
36619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
367b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3682593348eSBarry Smith   }
36919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3703270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3712593348eSBarry Smith   }
3722593348eSBarry Smith   return 0;
3732593348eSBarry Smith }
374b6490206SBarry Smith 
375119a934aSSatish Balay #define CHUNKSIZE  10
376cd0e1443SSatish Balay 
3775615d1e5SSatish Balay #undef __FUNC__
3785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
379639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
380cd0e1443SSatish Balay {
381cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
382cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
383cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
384a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3859df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
386cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
387cd0e1443SSatish Balay 
388cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
389cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3903b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
391e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
392e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3933b2fbd54SBarry Smith #endif
3942c3acbe9SBarry Smith     rp   = aj + ai[brow];
3952c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3962c3acbe9SBarry Smith     rmax = imax[brow];
3972c3acbe9SBarry Smith     nrow = ailen[brow];
398cd0e1443SSatish Balay     low  = 0;
399cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
4003b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
401e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
402e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
4033b2fbd54SBarry Smith #endif
404a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
405cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
406cd0e1443SSatish Balay       if (roworiented) {
407cd0e1443SSatish Balay         value = *v++;
408cd0e1443SSatish Balay       }
409cd0e1443SSatish Balay       else {
410cd0e1443SSatish Balay         value = v[k + l*m];
411cd0e1443SSatish Balay       }
412cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4132c3acbe9SBarry Smith       while (high-low > 7) {
414cd0e1443SSatish Balay         t = (low+high)/2;
415cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
416cd0e1443SSatish Balay         else              low  = t;
417cd0e1443SSatish Balay       }
418cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
419cd0e1443SSatish Balay         if (rp[i] > bcol) break;
420cd0e1443SSatish Balay         if (rp[i] == bcol) {
4217e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
422cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
423cd0e1443SSatish Balay           else                  *bap  = value;
424f1241b54SBarry Smith           goto noinsert1;
425cd0e1443SSatish Balay         }
426cd0e1443SSatish Balay       }
427c2653b3dSLois Curfman McInnes       if (nonew == 1) goto noinsert1;
42811ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
429cd0e1443SSatish Balay       if (nrow >= rmax) {
430cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
431cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
432cd0e1443SSatish Balay         Scalar *new_a;
433cd0e1443SSatish Balay 
43411ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
43596854ed6SLois Curfman McInnes 
43696854ed6SLois Curfman McInnes         /* Malloc new storage space */
4377e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
438cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4397e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
440cd0e1443SSatish Balay         new_i   = new_j + new_nz;
441cd0e1443SSatish Balay 
442cd0e1443SSatish Balay         /* copy over old data into new slots */
443cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4447e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
445a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
446a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
447a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
448cd0e1443SSatish Balay                                                            len*sizeof(int));
449a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
450a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
451a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
452a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
453cd0e1443SSatish Balay         /* free up old matrix storage */
454cd0e1443SSatish Balay         PetscFree(a->a);
455cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
456cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
457cd0e1443SSatish Balay         a->singlemalloc = 1;
458cd0e1443SSatish Balay 
459a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
460cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4617e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
46218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
463cd0e1443SSatish Balay         a->reallocs++;
464119a934aSSatish Balay         a->nz++;
465cd0e1443SSatish Balay       }
4667e67e3f9SSatish Balay       N = nrow++ - 1;
467cd0e1443SSatish Balay       /* shift up all the later entries in this row */
468cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
469cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4707e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
471cd0e1443SSatish Balay       }
4727e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
473cd0e1443SSatish Balay       rp[i]                      = bcol;
4747e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
475f1241b54SBarry Smith       noinsert1:;
476cd0e1443SSatish Balay       low = i;
477cd0e1443SSatish Balay     }
478cd0e1443SSatish Balay     ailen[brow] = nrow;
479cd0e1443SSatish Balay   }
480cd0e1443SSatish Balay   return 0;
481cd0e1443SSatish Balay }
482cd0e1443SSatish Balay 
4835615d1e5SSatish Balay #undef __FUNC__
48405a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
48592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
48692c4ed94SBarry Smith {
48792c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4888a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
4890e324ae4SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
4900e324ae4SSatish Balay   int         roworiented=a->roworiented;
4918a84c255SSatish Balay   int         *aj=a->j,nonew=a->nonew;
4920e324ae4SSatish Balay   int          bs2=a->bs2,bs=a->bs,stepval;
4930e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
49492c4ed94SBarry Smith 
4950e324ae4SSatish Balay   if (roworiented) {
4960e324ae4SSatish Balay     stepval = (n-1)*bs;
4970e324ae4SSatish Balay   } else {
4980e324ae4SSatish Balay     stepval = (m-1)*bs;
4990e324ae4SSatish Balay   }
50092c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
50192c4ed94SBarry Smith     row  = im[k];
50292c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
50392c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
5048a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
50592c4ed94SBarry Smith #endif
50692c4ed94SBarry Smith     rp   = aj + ai[row];
50792c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
50892c4ed94SBarry Smith     rmax = imax[row];
50992c4ed94SBarry Smith     nrow = ailen[row];
51092c4ed94SBarry Smith     low  = 0;
51192c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
51292c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
51392c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
5148a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
51592c4ed94SBarry Smith #endif
51692c4ed94SBarry Smith       col = in[l];
51792c4ed94SBarry Smith       if (roworiented) {
5180e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5190e324ae4SSatish Balay       } else {
5200e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
52192c4ed94SBarry Smith       }
52292c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
52392c4ed94SBarry Smith       while (high-low > 7) {
52492c4ed94SBarry Smith         t = (low+high)/2;
52592c4ed94SBarry Smith         if (rp[t] > col) high = t;
52692c4ed94SBarry Smith         else             low  = t;
52792c4ed94SBarry Smith       }
52892c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
52992c4ed94SBarry Smith         if (rp[i] > col) break;
53092c4ed94SBarry Smith         if (rp[i] == col) {
5318a84c255SSatish Balay           bap  = ap +  bs2*i;
5320e324ae4SSatish Balay           if (roworiented) {
5338a84c255SSatish Balay             if (is == ADD_VALUES) {
5340e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5358a84c255SSatish Balay                 for (jj=ii; jj<bs2; jj+=bs )
5368a84c255SSatish Balay                   bap[jj] += *value++;
5370e324ae4SSatish Balay             } else {
5380e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5390e324ae4SSatish Balay                 for (jj=ii; jj<bs2; jj+=bs )
5400e324ae4SSatish Balay                   bap[jj] = *value++;
5418a84c255SSatish Balay             }
5420e324ae4SSatish Balay           } else {
5430e324ae4SSatish Balay             if (is == ADD_VALUES) {
5440e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5450e324ae4SSatish Balay                 for (jj=0; jj<bs; jj++ )
5460e324ae4SSatish Balay                   *bap++ += *value++;
5470e324ae4SSatish Balay             } else {
5480e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5490e324ae4SSatish Balay                 for (jj=0; jj<bs; jj++ )
5500e324ae4SSatish Balay                   *bap++  = *value++;
5510e324ae4SSatish Balay             }
5528a84c255SSatish Balay           }
553f1241b54SBarry Smith           goto noinsert2;
55492c4ed94SBarry Smith         }
55592c4ed94SBarry Smith       }
55689280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
55711ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
55892c4ed94SBarry Smith       if (nrow >= rmax) {
55992c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
56092c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
56192c4ed94SBarry Smith         Scalar *new_a;
56292c4ed94SBarry Smith 
56311ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
56489280ab3SLois Curfman McInnes 
56592c4ed94SBarry Smith         /* malloc new storage space */
56692c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
56792c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
56892c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
56992c4ed94SBarry Smith         new_i   = new_j + new_nz;
57092c4ed94SBarry Smith 
57192c4ed94SBarry Smith         /* copy over old data into new slots */
57292c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
57392c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
57492c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
57592c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
57692c4ed94SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
57792c4ed94SBarry Smith                                                            len*sizeof(int));
57892c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
57992c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
58092c4ed94SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
58192c4ed94SBarry Smith                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
58292c4ed94SBarry Smith         /* free up old matrix storage */
58392c4ed94SBarry Smith         PetscFree(a->a);
58492c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
58592c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
58692c4ed94SBarry Smith         a->singlemalloc = 1;
58792c4ed94SBarry Smith 
58892c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
58992c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
59092c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
59192c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
59292c4ed94SBarry Smith         a->reallocs++;
59392c4ed94SBarry Smith         a->nz++;
59492c4ed94SBarry Smith       }
59592c4ed94SBarry Smith       N = nrow++ - 1;
59692c4ed94SBarry Smith       /* shift up all the later entries in this row */
59792c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
59892c4ed94SBarry Smith         rp[ii+1] = rp[ii];
59992c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
60092c4ed94SBarry Smith       }
60192c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
60292c4ed94SBarry Smith       rp[i] = col;
6038a84c255SSatish Balay       bap   = ap +  bs2*i;
6040e324ae4SSatish Balay       if (roworiented) {
6050e324ae4SSatish Balay         for ( ii=0; ii<bs; ii++,value+=stepval)
6068a84c255SSatish Balay           for (jj=ii; jj<bs2; jj+=bs )
6070e324ae4SSatish Balay             bap[jj] = *value++;
6080e324ae4SSatish Balay       } else {
6090e324ae4SSatish Balay         for ( ii=0; ii<bs; ii++,value+=stepval )
6100e324ae4SSatish Balay           for (jj=0; jj<bs; jj++ )
6110e324ae4SSatish Balay             *bap++  = *value++;
6120e324ae4SSatish Balay       }
613f1241b54SBarry Smith       noinsert2:;
61492c4ed94SBarry Smith       low = i;
61592c4ed94SBarry Smith     }
61692c4ed94SBarry Smith     ailen[row] = nrow;
61792c4ed94SBarry Smith   }
61892c4ed94SBarry Smith   return 0;
61992c4ed94SBarry Smith }
62092c4ed94SBarry Smith 
62192c4ed94SBarry Smith #undef __FUNC__
6225eea60f9SBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */
6238f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6240b824a48SSatish Balay {
6250b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6260b824a48SSatish Balay   *m = a->m; *n = a->n;
6270b824a48SSatish Balay   return 0;
6280b824a48SSatish Balay }
6290b824a48SSatish Balay 
6305615d1e5SSatish Balay #undef __FUNC__
6315eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */
6328f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6330b824a48SSatish Balay {
6340b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6350b824a48SSatish Balay   *m = 0; *n = a->m;
6360b824a48SSatish Balay   return 0;
6370b824a48SSatish Balay }
6380b824a48SSatish Balay 
6395615d1e5SSatish Balay #undef __FUNC__
6405615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6419867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6429867e207SSatish Balay {
6439867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6447e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6459867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6469867e207SSatish Balay 
6479867e207SSatish Balay   bs  = a->bs;
6489867e207SSatish Balay   ai  = a->i;
6499867e207SSatish Balay   aj  = a->j;
6509867e207SSatish Balay   aa  = a->a;
6519df24120SSatish Balay   bs2 = a->bs2;
6529867e207SSatish Balay 
653e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6549867e207SSatish Balay 
6559867e207SSatish Balay   bn  = row/bs;   /* Block number */
6569867e207SSatish Balay   bp  = row % bs; /* Block Position */
6579867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6589867e207SSatish Balay   *nz = bs*M;
6599867e207SSatish Balay 
6609867e207SSatish Balay   if (v) {
6619867e207SSatish Balay     *v = 0;
6629867e207SSatish Balay     if (*nz) {
6639867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6649867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6659867e207SSatish Balay         v_i  = *v + i*bs;
6667e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6677e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6689867e207SSatish Balay       }
6699867e207SSatish Balay     }
6709867e207SSatish Balay   }
6719867e207SSatish Balay 
6729867e207SSatish Balay   if (idx) {
6739867e207SSatish Balay     *idx = 0;
6749867e207SSatish Balay     if (*nz) {
6759867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6769867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6779867e207SSatish Balay         idx_i = *idx + i*bs;
6789867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6799867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6809867e207SSatish Balay       }
6819867e207SSatish Balay     }
6829867e207SSatish Balay   }
6839867e207SSatish Balay   return 0;
6849867e207SSatish Balay }
6859867e207SSatish Balay 
6865615d1e5SSatish Balay #undef __FUNC__
6875eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */
6889867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6899867e207SSatish Balay {
6909867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6919867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
6929867e207SSatish Balay   return 0;
6939867e207SSatish Balay }
694b6490206SBarry Smith 
6955615d1e5SSatish Balay #undef __FUNC__
6965615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
6978f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B)
698f2501298SSatish Balay {
699f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
700f2501298SSatish Balay   Mat         C;
701f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
7029df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
703f2501298SSatish Balay   Scalar      *array=a->a;
704f2501298SSatish Balay 
705f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
706e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
707f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
708f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
709a7c10996SSatish Balay 
710a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
711f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
712f2501298SSatish Balay   PetscFree(col);
713f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
714f2501298SSatish Balay   cols = rows + bs;
715f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
716f2501298SSatish Balay     cols[0] = i*bs;
717f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
718f2501298SSatish Balay     len = ai[i+1] - ai[i];
719f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
720f2501298SSatish Balay       rows[0] = (*aj++)*bs;
721f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
722f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
723f2501298SSatish Balay       array += bs2;
724f2501298SSatish Balay     }
725f2501298SSatish Balay   }
7261073c447SSatish Balay   PetscFree(rows);
727f2501298SSatish Balay 
7286d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7296d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
730f2501298SSatish Balay 
731f2501298SSatish Balay   if (B != PETSC_NULL) {
732f2501298SSatish Balay     *B = C;
733f2501298SSatish Balay   } else {
734f2501298SSatish Balay     /* This isn't really an in-place transpose */
735f2501298SSatish Balay     PetscFree(a->a);
736f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
737f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
738f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
739f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
740f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
741f2501298SSatish Balay     PetscFree(a);
742f09e8eb9SSatish Balay     PetscMemcpy(A,C,sizeof(struct _p_Mat));
743f2501298SSatish Balay     PetscHeaderDestroy(C);
744f2501298SSatish Balay   }
745f2501298SSatish Balay   return 0;
746f2501298SSatish Balay }
747f2501298SSatish Balay 
748f2501298SSatish Balay 
7495615d1e5SSatish Balay #undef __FUNC__
7505615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
7518f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
752584200bdSSatish Balay {
753584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
754584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
755a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
756d402145bSBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax;
757584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
758584200bdSSatish Balay 
7596d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
760584200bdSSatish Balay 
761d402145bSBarry Smith   rmax = ailen[0];
762584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
763584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
764584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
765d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
766584200bdSSatish Balay     if (fshift) {
767a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
768584200bdSSatish Balay       N = ailen[i];
769584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
770584200bdSSatish Balay         ip[j-fshift] = ip[j];
7717e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
772584200bdSSatish Balay       }
773584200bdSSatish Balay     }
774584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
775584200bdSSatish Balay   }
776584200bdSSatish Balay   if (mbs) {
777584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
778584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
779584200bdSSatish Balay   }
780584200bdSSatish Balay   /* reset ilen and imax for each row */
781584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
782584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
783584200bdSSatish Balay   }
784a7c10996SSatish Balay   a->nz = ai[mbs];
785584200bdSSatish Balay 
786584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
787584200bdSSatish Balay   if (fshift && a->diag) {
788584200bdSSatish Balay     PetscFree(a->diag);
789584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
790584200bdSSatish Balay     a->diag = 0;
791584200bdSSatish Balay   }
7923d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
793219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
7943d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
795584200bdSSatish Balay            a->reallocs);
796d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
797e2f3b5e9SSatish Balay   a->reallocs          = 0;
7984e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
7994e220ebcSLois Curfman McInnes 
800584200bdSSatish Balay   return 0;
801584200bdSSatish Balay }
802584200bdSSatish Balay 
8035615d1e5SSatish Balay #undef __FUNC__
8045615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
8058f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A)
8062593348eSBarry Smith {
807b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8089df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
8092593348eSBarry Smith   return 0;
8102593348eSBarry Smith }
8112593348eSBarry Smith 
8125615d1e5SSatish Balay #undef __FUNC__
8135eea60f9SBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */
814b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8152593348eSBarry Smith {
8162593348eSBarry Smith   Mat         A  = (Mat) obj;
817b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8182593348eSBarry Smith 
8192593348eSBarry Smith #if defined(PETSC_LOG)
8202593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8212593348eSBarry Smith #endif
8222593348eSBarry Smith   PetscFree(a->a);
8232593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8242593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8252593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8262593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8272593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
828de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8292593348eSBarry Smith   PetscFree(a);
830f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
831f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8322593348eSBarry Smith   return 0;
8332593348eSBarry Smith }
8342593348eSBarry Smith 
8355615d1e5SSatish Balay #undef __FUNC__
8365eea60f9SBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */
8378f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8382593348eSBarry Smith {
839b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8406d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
8416d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
8426d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
843219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
8446d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
845c2653b3dSLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
84696854ed6SLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
8476d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
8486d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
849219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8506d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8516d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
85290f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
8532b362799SSatish Balay            op == MAT_IGNORE_OFF_PROC_ENTRIES)
85494a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8556d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
856e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8572593348eSBarry Smith   else
858e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8592593348eSBarry Smith   return 0;
8602593348eSBarry Smith }
8612593348eSBarry Smith 
8622593348eSBarry Smith 
8632593348eSBarry Smith /* -------------------------------------------------------*/
8642593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8652593348eSBarry Smith /* -------------------------------------------------------*/
866b6490206SBarry Smith #include "pinclude/plapack.h"
867b6490206SBarry Smith 
8685615d1e5SSatish Balay #undef __FUNC__
8695615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
87039b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8712593348eSBarry Smith {
872b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
87339b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
874c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8752593348eSBarry Smith 
876c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
877c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
878b6490206SBarry Smith 
879119a934aSSatish Balay   idx   = a->j;
880119a934aSSatish Balay   v     = a->a;
881119a934aSSatish Balay   ii    = a->i;
882119a934aSSatish Balay 
883119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
884119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
885119a934aSSatish Balay     sum  = 0.0;
886119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8871a6a6d98SBarry Smith     z[i] = sum;
888119a934aSSatish Balay   }
889c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
890c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
89139b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
89239b95ed1SBarry Smith   return 0;
89339b95ed1SBarry Smith }
89439b95ed1SBarry Smith 
8955615d1e5SSatish Balay #undef __FUNC__
8965615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
89739b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
89839b95ed1SBarry Smith {
89939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
90039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
90139b95ed1SBarry Smith   register Scalar x1,x2;
90239b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
903c16cb8f2SBarry Smith   int             j,n;
90439b95ed1SBarry Smith 
905c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
906c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
90739b95ed1SBarry Smith 
90839b95ed1SBarry Smith   idx   = a->j;
90939b95ed1SBarry Smith   v     = a->a;
91039b95ed1SBarry Smith   ii    = a->i;
91139b95ed1SBarry Smith 
912119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
913119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
914119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
915119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
916119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
917119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
918119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
919119a934aSSatish Balay       v += 4;
920119a934aSSatish Balay     }
9211a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
922119a934aSSatish Balay     z += 2;
923119a934aSSatish Balay   }
924c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
925c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
92639b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
92739b95ed1SBarry Smith   return 0;
92839b95ed1SBarry Smith }
92939b95ed1SBarry Smith 
9305615d1e5SSatish Balay #undef __FUNC__
9315615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
93239b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
93339b95ed1SBarry Smith {
93439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
93539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
936c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
93739b95ed1SBarry Smith 
938c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
939c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
94039b95ed1SBarry Smith 
94139b95ed1SBarry Smith   idx   = a->j;
94239b95ed1SBarry Smith   v     = a->a;
94339b95ed1SBarry Smith   ii    = a->i;
94439b95ed1SBarry Smith 
945119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
946119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
947119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
948119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
949119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
950119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
951119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
952119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
953119a934aSSatish Balay       v += 9;
954119a934aSSatish Balay     }
9551a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
956119a934aSSatish Balay     z += 3;
957119a934aSSatish Balay   }
958c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
959c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
96039b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
96139b95ed1SBarry Smith   return 0;
96239b95ed1SBarry Smith }
96339b95ed1SBarry Smith 
9645615d1e5SSatish Balay #undef __FUNC__
9655615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
96639b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
96739b95ed1SBarry Smith {
96839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
96939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
97039b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
97139b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
972c16cb8f2SBarry Smith   int             j,n;
97339b95ed1SBarry Smith 
974c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
975c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
97639b95ed1SBarry Smith 
97739b95ed1SBarry Smith   idx   = a->j;
97839b95ed1SBarry Smith   v     = a->a;
97939b95ed1SBarry Smith   ii    = a->i;
98039b95ed1SBarry Smith 
981119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
982119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
983119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
984119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
985119a934aSSatish Balay       xb = x + 4*(*idx++);
986119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
987119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
988119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
989119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
990119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
991119a934aSSatish Balay       v += 16;
992119a934aSSatish Balay     }
9931a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
994119a934aSSatish Balay     z += 4;
995119a934aSSatish Balay   }
996c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
997c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
99839b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
99939b95ed1SBarry Smith   return 0;
100039b95ed1SBarry Smith }
100139b95ed1SBarry Smith 
10025615d1e5SSatish Balay #undef __FUNC__
10035615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
100439b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
100539b95ed1SBarry Smith {
100639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
100739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
100839b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
1009c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
101039b95ed1SBarry Smith 
1011c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1012c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
101339b95ed1SBarry Smith 
101439b95ed1SBarry Smith   idx   = a->j;
101539b95ed1SBarry Smith   v     = a->a;
101639b95ed1SBarry Smith   ii    = a->i;
101739b95ed1SBarry Smith 
1018119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1019119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1020119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1021119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1022119a934aSSatish Balay       xb = x + 5*(*idx++);
1023119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1024119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1025119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1026119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1027119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1028119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1029119a934aSSatish Balay       v += 25;
1030119a934aSSatish Balay     }
10311a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1032119a934aSSatish Balay     z += 5;
1033119a934aSSatish Balay   }
1034c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1035c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
103639b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
103739b95ed1SBarry Smith   return 0;
103839b95ed1SBarry Smith }
103939b95ed1SBarry Smith 
10405615d1e5SSatish Balay #undef __FUNC__
104148e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
104248e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
104348e9ddb2SSatish Balay {
104448e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
104548e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
104648e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
104748e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
104848e9ddb2SSatish Balay 
104948e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
105048e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
105148e9ddb2SSatish Balay 
105248e9ddb2SSatish Balay   idx   = a->j;
105348e9ddb2SSatish Balay   v     = a->a;
105448e9ddb2SSatish Balay   ii    = a->i;
105548e9ddb2SSatish Balay 
105648e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
105748e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
105848e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
105948e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
106048e9ddb2SSatish Balay       xb = x + 7*(*idx++);
106148e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
106248e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
106348e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
106448e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
106548e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
106648e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
106748e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
106848e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
106948e9ddb2SSatish Balay       v += 49;
107048e9ddb2SSatish Balay     }
107148e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
107248e9ddb2SSatish Balay     z += 7;
107348e9ddb2SSatish Balay   }
107448e9ddb2SSatish Balay 
107548e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
107648e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
107748e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
107848e9ddb2SSatish Balay   return 0;
107948e9ddb2SSatish Balay }
108048e9ddb2SSatish Balay 
108148e9ddb2SSatish Balay #undef __FUNC__
10825615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
108339b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
108439b95ed1SBarry Smith {
108539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
108639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1087c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1088c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1089c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
109039b95ed1SBarry Smith 
1091c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1092c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
109339b95ed1SBarry Smith 
109439b95ed1SBarry Smith   idx   = a->j;
109539b95ed1SBarry Smith   v     = a->a;
109639b95ed1SBarry Smith   ii    = a->i;
109739b95ed1SBarry Smith 
109839b95ed1SBarry Smith 
1099119a934aSSatish Balay   if (!a->mult_work) {
11003b547af2SSatish Balay     k = PetscMax(a->m,a->n);
11012b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1102119a934aSSatish Balay   }
1103119a934aSSatish Balay   work = a->mult_work;
1104119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1105119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1106119a934aSSatish Balay     ncols = n*bs;
1107119a934aSSatish Balay     workt = work;
1108119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1109119a934aSSatish Balay       xb = x + bs*(*idx++);
1110119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1111119a934aSSatish Balay       workt += bs;
1112119a934aSSatish Balay     }
11131a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1114119a934aSSatish Balay     v += n*bs2;
1115119a934aSSatish Balay     z += bs;
1116119a934aSSatish Balay   }
1117c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1118c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11191a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1120bb42667fSSatish Balay   return 0;
1121bb42667fSSatish Balay }
1122bb42667fSSatish Balay 
11235615d1e5SSatish Balay #undef __FUNC__
11245615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1125f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1126f44d3a6dSSatish Balay {
1127f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1128f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1129c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1130f44d3a6dSSatish Balay 
1131c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1132c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1133c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1134f44d3a6dSSatish Balay 
1135f44d3a6dSSatish Balay   idx   = a->j;
1136f44d3a6dSSatish Balay   v     = a->a;
1137f44d3a6dSSatish Balay   ii    = a->i;
1138f44d3a6dSSatish Balay 
1139f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1140f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1141f44d3a6dSSatish Balay     sum  = y[i];
1142f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1143f44d3a6dSSatish Balay     z[i] = sum;
1144f44d3a6dSSatish Balay   }
1145c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1146c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1147c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1148f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1149f44d3a6dSSatish Balay   return 0;
1150f44d3a6dSSatish Balay }
1151f44d3a6dSSatish Balay 
11525615d1e5SSatish Balay #undef __FUNC__
11535615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1154f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1155f44d3a6dSSatish Balay {
1156f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1157f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1158f44d3a6dSSatish Balay   register Scalar x1,x2;
1159f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1160c16cb8f2SBarry Smith   int             j,n;
1161f44d3a6dSSatish Balay 
1162c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1163c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1164c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1165f44d3a6dSSatish Balay 
1166f44d3a6dSSatish Balay   idx   = a->j;
1167f44d3a6dSSatish Balay   v     = a->a;
1168f44d3a6dSSatish Balay   ii    = a->i;
1169f44d3a6dSSatish Balay 
1170f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1171f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1172f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1173f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1174f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1175f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1176f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1177f44d3a6dSSatish Balay       v += 4;
1178f44d3a6dSSatish Balay     }
1179f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1180f44d3a6dSSatish Balay     z += 2; y += 2;
1181f44d3a6dSSatish Balay   }
1182c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1183c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1184c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1185f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1186f44d3a6dSSatish Balay   return 0;
1187f44d3a6dSSatish Balay }
1188f44d3a6dSSatish Balay 
11895615d1e5SSatish Balay #undef __FUNC__
11905615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1191f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1192f44d3a6dSSatish Balay {
1193f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1194f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1195c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1196f44d3a6dSSatish Balay 
1197c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1198c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1199c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1200f44d3a6dSSatish Balay 
1201f44d3a6dSSatish Balay   idx   = a->j;
1202f44d3a6dSSatish Balay   v     = a->a;
1203f44d3a6dSSatish Balay   ii    = a->i;
1204f44d3a6dSSatish Balay 
1205f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1206f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1207f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1208f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1209f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1210f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1211f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1212f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1213f44d3a6dSSatish Balay       v += 9;
1214f44d3a6dSSatish Balay     }
1215f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1216f44d3a6dSSatish Balay     z += 3; y += 3;
1217f44d3a6dSSatish Balay   }
1218c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1219c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1220c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1221f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1222f44d3a6dSSatish Balay   return 0;
1223f44d3a6dSSatish Balay }
1224f44d3a6dSSatish Balay 
12255615d1e5SSatish Balay #undef __FUNC__
12265615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1227f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1228f44d3a6dSSatish Balay {
1229f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1230f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1231f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1232f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1233c16cb8f2SBarry Smith   int             j,n;
1234f44d3a6dSSatish Balay 
1235c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1236c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1237c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1238f44d3a6dSSatish Balay 
1239f44d3a6dSSatish Balay   idx   = a->j;
1240f44d3a6dSSatish Balay   v     = a->a;
1241f44d3a6dSSatish Balay   ii    = a->i;
1242f44d3a6dSSatish Balay 
1243f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1244f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1245f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1246f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1247f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1248f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1249f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1250f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1251f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1252f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1253f44d3a6dSSatish Balay       v += 16;
1254f44d3a6dSSatish Balay     }
1255f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1256f44d3a6dSSatish Balay     z += 4; y += 4;
1257f44d3a6dSSatish Balay   }
1258c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1259c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1260c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1261f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1262f44d3a6dSSatish Balay   return 0;
1263f44d3a6dSSatish Balay }
1264f44d3a6dSSatish Balay 
12655615d1e5SSatish Balay #undef __FUNC__
12665615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1267f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1268f44d3a6dSSatish Balay {
1269f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1270f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1271f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1272c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1273f44d3a6dSSatish Balay 
1274c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1275c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1276c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1277f44d3a6dSSatish Balay 
1278f44d3a6dSSatish Balay   idx   = a->j;
1279f44d3a6dSSatish Balay   v     = a->a;
1280f44d3a6dSSatish Balay   ii    = a->i;
1281f44d3a6dSSatish Balay 
1282f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1283f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1284f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1285f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1286f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1287f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1288f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1289f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1290f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1291f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1292f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1293f44d3a6dSSatish Balay       v += 25;
1294f44d3a6dSSatish Balay     }
1295f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1296f44d3a6dSSatish Balay     z += 5; y += 5;
1297f44d3a6dSSatish Balay   }
1298c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1299c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1300c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1301f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1302f44d3a6dSSatish Balay   return 0;
1303f44d3a6dSSatish Balay }
1304f44d3a6dSSatish Balay 
13055615d1e5SSatish Balay #undef __FUNC__
130648e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
130748e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
130848e9ddb2SSatish Balay {
130948e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
131048e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
131148e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
131248e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
131348e9ddb2SSatish Balay 
131448e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
131548e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
131648e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
131748e9ddb2SSatish Balay 
131848e9ddb2SSatish Balay   idx   = a->j;
131948e9ddb2SSatish Balay   v     = a->a;
132048e9ddb2SSatish Balay   ii    = a->i;
132148e9ddb2SSatish Balay 
132248e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
132348e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
132448e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
132548e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
132648e9ddb2SSatish Balay       xb = x + 7*(*idx++);
132748e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
132848e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
132948e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
133048e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
133148e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
133248e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
133348e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
133448e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
133548e9ddb2SSatish Balay       v += 49;
133648e9ddb2SSatish Balay     }
133748e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
133848e9ddb2SSatish Balay     z += 7; y += 7;
133948e9ddb2SSatish Balay   }
134048e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
134148e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
134248e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
134348e9ddb2SSatish Balay   PLogFlops(98*a->nz);
134448e9ddb2SSatish Balay   return 0;
134548e9ddb2SSatish Balay }
134648e9ddb2SSatish Balay 
134748e9ddb2SSatish Balay 
134848e9ddb2SSatish Balay #undef __FUNC__
13495615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1350f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1351f44d3a6dSSatish Balay {
1352f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1353f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1354f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1355f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1356f44d3a6dSSatish Balay 
1357f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1358f44d3a6dSSatish Balay 
1359c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1360c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1361f44d3a6dSSatish Balay 
1362f44d3a6dSSatish Balay   idx   = a->j;
1363f44d3a6dSSatish Balay   v     = a->a;
1364f44d3a6dSSatish Balay   ii    = a->i;
1365f44d3a6dSSatish Balay 
1366f44d3a6dSSatish Balay 
1367f44d3a6dSSatish Balay   if (!a->mult_work) {
1368f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1369f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1370f44d3a6dSSatish Balay   }
1371f44d3a6dSSatish Balay   work = a->mult_work;
1372f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1373f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1374f44d3a6dSSatish Balay     ncols = n*bs;
1375f44d3a6dSSatish Balay     workt = work;
1376f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1377f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1378f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1379f44d3a6dSSatish Balay       workt += bs;
1380f44d3a6dSSatish Balay     }
1381f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1382f44d3a6dSSatish Balay     v += n*bs2;
1383f44d3a6dSSatish Balay     z += bs;
1384f44d3a6dSSatish Balay   }
1385c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1386c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1387f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1388f44d3a6dSSatish Balay   return 0;
1389f44d3a6dSSatish Balay }
1390f44d3a6dSSatish Balay 
13915615d1e5SSatish Balay #undef __FUNC__
13925615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
13938f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1394bb42667fSSatish Balay {
1395bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
13961a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1397bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1398bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
13999df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1400119a934aSSatish Balay 
1401119a934aSSatish Balay 
140290f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
140390f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1404bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1405bb42667fSSatish Balay 
1406119a934aSSatish Balay   idx   = a->j;
1407119a934aSSatish Balay   v     = a->a;
1408119a934aSSatish Balay   ii    = a->i;
1409119a934aSSatish Balay 
1410119a934aSSatish Balay   switch (bs) {
1411119a934aSSatish Balay   case 1:
1412119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1413119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1414119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1415119a934aSSatish Balay       ib = idx + ai[i];
1416119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1417bb1453f0SSatish Balay         rval    = ib[j];
1418bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1419119a934aSSatish Balay       }
1420119a934aSSatish Balay     }
1421119a934aSSatish Balay     break;
1422119a934aSSatish Balay   case 2:
1423119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1424119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1425119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1426119a934aSSatish Balay       ib = idx + ai[i];
1427119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1428119a934aSSatish Balay         rval      = ib[j]*2;
1429bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1430bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1431119a934aSSatish Balay         v += 4;
1432119a934aSSatish Balay       }
1433119a934aSSatish Balay     }
1434119a934aSSatish Balay     break;
1435119a934aSSatish Balay   case 3:
1436119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1437119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1438119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1439119a934aSSatish Balay       ib = idx + ai[i];
1440119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1441119a934aSSatish Balay         rval      = ib[j]*3;
1442bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1443bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1444bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1445119a934aSSatish Balay         v += 9;
1446119a934aSSatish Balay       }
1447119a934aSSatish Balay     }
1448119a934aSSatish Balay     break;
1449119a934aSSatish Balay   case 4:
1450119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1451119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1452119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1453119a934aSSatish Balay       ib = idx + ai[i];
1454119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1455119a934aSSatish Balay         rval      = ib[j]*4;
1456bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1457bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1458bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1459bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1460119a934aSSatish Balay         v += 16;
1461119a934aSSatish Balay       }
1462119a934aSSatish Balay     }
1463119a934aSSatish Balay     break;
1464119a934aSSatish Balay   case 5:
1465119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1466119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1467119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1468119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1469119a934aSSatish Balay       ib = idx + ai[i];
1470119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1471119a934aSSatish Balay         rval      = ib[j]*5;
1472bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1473bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1474bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1475bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1476bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1477119a934aSSatish Balay         v += 25;
1478119a934aSSatish Balay       }
1479119a934aSSatish Balay     }
1480119a934aSSatish Balay     break;
1481119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1482119a934aSSatish Balay     default: {
1483119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1484119a934aSSatish Balay       if (!a->mult_work) {
14853b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1486bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1487119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1488119a934aSSatish Balay       }
1489119a934aSSatish Balay       work = a->mult_work;
1490119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1491119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1492119a934aSSatish Balay         ncols = n*bs;
1493119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1494119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1495119a934aSSatish Balay         v += n*bs2;
1496119a934aSSatish Balay         x += bs;
1497119a934aSSatish Balay         workt = work;
1498119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1499119a934aSSatish Balay           zb = z + bs*(*idx++);
1500bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1501119a934aSSatish Balay           workt += bs;
1502119a934aSSatish Balay         }
1503119a934aSSatish Balay       }
1504119a934aSSatish Balay     }
1505119a934aSSatish Balay   }
15060419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
15070419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1508faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1509faf6580fSSatish Balay   return 0;
1510faf6580fSSatish Balay }
15111c351548SSatish Balay 
15125615d1e5SSatish Balay #undef __FUNC__
15135615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
15148f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1515faf6580fSSatish Balay {
1516faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1517faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1518faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1519faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1520faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1521faf6580fSSatish Balay 
1522faf6580fSSatish Balay 
1523faf6580fSSatish Balay 
152490f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
152590f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1526faf6580fSSatish Balay 
1527648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1528648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1529648c1d95SSatish Balay 
1530faf6580fSSatish Balay 
1531faf6580fSSatish Balay   idx   = a->j;
1532faf6580fSSatish Balay   v     = a->a;
1533faf6580fSSatish Balay   ii    = a->i;
1534faf6580fSSatish Balay 
1535faf6580fSSatish Balay   switch (bs) {
1536faf6580fSSatish Balay   case 1:
1537faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1538faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1539faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1540faf6580fSSatish Balay       ib = idx + ai[i];
1541faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1542faf6580fSSatish Balay         rval    = ib[j];
1543faf6580fSSatish Balay         z[rval] += *v++ * x1;
1544faf6580fSSatish Balay       }
1545faf6580fSSatish Balay     }
1546faf6580fSSatish Balay     break;
1547faf6580fSSatish Balay   case 2:
1548faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1549faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1550faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1551faf6580fSSatish Balay       ib = idx + ai[i];
1552faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1553faf6580fSSatish Balay         rval      = ib[j]*2;
1554faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1555faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1556faf6580fSSatish Balay         v += 4;
1557faf6580fSSatish Balay       }
1558faf6580fSSatish Balay     }
1559faf6580fSSatish Balay     break;
1560faf6580fSSatish Balay   case 3:
1561faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1562faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1563faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1564faf6580fSSatish Balay       ib = idx + ai[i];
1565faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1566faf6580fSSatish Balay         rval      = ib[j]*3;
1567faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1568faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1569faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1570faf6580fSSatish Balay         v += 9;
1571faf6580fSSatish Balay       }
1572faf6580fSSatish Balay     }
1573faf6580fSSatish Balay     break;
1574faf6580fSSatish Balay   case 4:
1575faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1576faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1577faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1578faf6580fSSatish Balay       ib = idx + ai[i];
1579faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1580faf6580fSSatish Balay         rval      = ib[j]*4;
1581faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1582faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1583faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1584faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1585faf6580fSSatish Balay         v += 16;
1586faf6580fSSatish Balay       }
1587faf6580fSSatish Balay     }
1588faf6580fSSatish Balay     break;
1589faf6580fSSatish Balay   case 5:
1590faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1591faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1592faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1593faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1594faf6580fSSatish Balay       ib = idx + ai[i];
1595faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1596faf6580fSSatish Balay         rval      = ib[j]*5;
1597faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1598faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1599faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1600faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1601faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1602faf6580fSSatish Balay         v += 25;
1603faf6580fSSatish Balay       }
1604faf6580fSSatish Balay     }
1605faf6580fSSatish Balay     break;
1606faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1607faf6580fSSatish Balay     default: {
1608faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1609faf6580fSSatish Balay       if (!a->mult_work) {
1610faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1611faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1612faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1613faf6580fSSatish Balay       }
1614faf6580fSSatish Balay       work = a->mult_work;
1615faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1616faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1617faf6580fSSatish Balay         ncols = n*bs;
1618faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1619faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1620faf6580fSSatish Balay         v += n*bs2;
1621faf6580fSSatish Balay         x += bs;
1622faf6580fSSatish Balay         workt = work;
1623faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1624faf6580fSSatish Balay           zb = z + bs*(*idx++);
1625faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1626faf6580fSSatish Balay           workt += bs;
1627faf6580fSSatish Balay         }
1628faf6580fSSatish Balay       }
1629faf6580fSSatish Balay     }
1630faf6580fSSatish Balay   }
1631faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1632faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1633faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16342593348eSBarry Smith   return 0;
16352593348eSBarry Smith }
16362593348eSBarry Smith 
16375615d1e5SSatish Balay #undef __FUNC__
16385eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */
16398f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16402593348eSBarry Smith {
1641b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16424e220ebcSLois Curfman McInnes 
16434e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16444e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16454e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16464e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16474e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16484e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16494e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16504e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16514e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16524e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16534e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16544e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16554e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16564e220ebcSLois Curfman McInnes   if (A->factor) {
16574e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16584e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16594e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16604e220ebcSLois Curfman McInnes   } else {
16614e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16624e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16634e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16644e220ebcSLois Curfman McInnes   }
16652593348eSBarry Smith   return 0;
16662593348eSBarry Smith }
16672593348eSBarry Smith 
16685615d1e5SSatish Balay #undef __FUNC__
16695615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
16708f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
167191d316f6SSatish Balay {
167291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
167391d316f6SSatish Balay 
1674e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
167591d316f6SSatish Balay 
167691d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
167791d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1678a7c10996SSatish Balay       (a->nz != b->nz)) {
167991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168091d316f6SSatish Balay   }
168191d316f6SSatish Balay 
168291d316f6SSatish Balay   /* if the a->i are the same */
168391d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
168491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168591d316f6SSatish Balay   }
168691d316f6SSatish Balay 
168791d316f6SSatish Balay   /* if a->j are the same */
168891d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
168991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169091d316f6SSatish Balay   }
169191d316f6SSatish Balay 
169291d316f6SSatish Balay   /* if a->a are the same */
169391d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
169491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169591d316f6SSatish Balay   }
169691d316f6SSatish Balay   *flg = PETSC_TRUE;
169791d316f6SSatish Balay   return 0;
169891d316f6SSatish Balay 
169991d316f6SSatish Balay }
170091d316f6SSatish Balay 
17015615d1e5SSatish Balay #undef __FUNC__
17025615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
17038f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
170491d316f6SSatish Balay {
170591d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17067e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
170717e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
170817e48fc4SSatish Balay 
17090513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
171017e48fc4SSatish Balay   bs   = a->bs;
171117e48fc4SSatish Balay   aa   = a->a;
171217e48fc4SSatish Balay   ai   = a->i;
171317e48fc4SSatish Balay   aj   = a->j;
171417e48fc4SSatish Balay   ambs = a->mbs;
17159df24120SSatish Balay   bs2  = a->bs2;
171691d316f6SSatish Balay 
171791d316f6SSatish Balay   VecSet(&zero,v);
171890f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1719e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
172017e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
172117e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
172217e48fc4SSatish Balay       if (aj[j] == i) {
172317e48fc4SSatish Balay         row  = i*bs;
17247e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17257e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
172691d316f6SSatish Balay         break;
172791d316f6SSatish Balay       }
172891d316f6SSatish Balay     }
172991d316f6SSatish Balay   }
173091d316f6SSatish Balay   return 0;
173191d316f6SSatish Balay }
173291d316f6SSatish Balay 
17335615d1e5SSatish Balay #undef __FUNC__
17345615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17358f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17369867e207SSatish Balay {
17379867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17389867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17397e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17409867e207SSatish Balay 
17419867e207SSatish Balay   ai  = a->i;
17429867e207SSatish Balay   aj  = a->j;
17439867e207SSatish Balay   aa  = a->a;
17449867e207SSatish Balay   m   = a->m;
17459867e207SSatish Balay   n   = a->n;
17469867e207SSatish Balay   bs  = a->bs;
17479867e207SSatish Balay   mbs = a->mbs;
17489df24120SSatish Balay   bs2 = a->bs2;
17499867e207SSatish Balay   if (ll) {
175090f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1751e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17529867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17539867e207SSatish Balay       M  = ai[i+1] - ai[i];
17549867e207SSatish Balay       li = l + i*bs;
17557e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17569867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17577e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17589867e207SSatish Balay           (*v++) *= li[k%bs];
17599867e207SSatish Balay         }
17609867e207SSatish Balay       }
17619867e207SSatish Balay     }
17629867e207SSatish Balay   }
17639867e207SSatish Balay 
17649867e207SSatish Balay   if (rr) {
176590f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1766e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17679867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17689867e207SSatish Balay       M  = ai[i+1] - ai[i];
17697e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17709867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17719867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17729867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17739867e207SSatish Balay           x = ri[k];
17749867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17759867e207SSatish Balay         }
17769867e207SSatish Balay       }
17779867e207SSatish Balay     }
17789867e207SSatish Balay   }
17799867e207SSatish Balay   return 0;
17809867e207SSatish Balay }
17819867e207SSatish Balay 
17829867e207SSatish Balay 
1783b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1784b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17852a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1786736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1787736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17881a6a6d98SBarry Smith 
17891a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17901a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
17911a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
17921a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
17931a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
17941a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
179548e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
17961a6a6d98SBarry Smith 
17977fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
17987fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
17997fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
18007fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
18017fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
18027fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
18032593348eSBarry Smith 
18045615d1e5SSatish Balay #undef __FUNC__
18055615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
18068f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
18072593348eSBarry Smith {
1808b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18092593348eSBarry Smith   Scalar      *v = a->a;
18102593348eSBarry Smith   double      sum = 0.0;
18119df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18122593348eSBarry Smith 
18132593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18140419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18152593348eSBarry Smith #if defined(PETSC_COMPLEX)
18162593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18172593348eSBarry Smith #else
18182593348eSBarry Smith       sum += (*v)*(*v); v++;
18192593348eSBarry Smith #endif
18202593348eSBarry Smith     }
18212593348eSBarry Smith     *norm = sqrt(sum);
18222593348eSBarry Smith   }
18232593348eSBarry Smith   else {
1824e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18252593348eSBarry Smith   }
18262593348eSBarry Smith   return 0;
18272593348eSBarry Smith }
18282593348eSBarry Smith 
1829*3eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
18302593348eSBarry Smith /*
18312593348eSBarry Smith      note: This can only work for identity for row and col. It would
18322593348eSBarry Smith    be good to check this and otherwise generate an error.
18332593348eSBarry Smith */
18345615d1e5SSatish Balay #undef __FUNC__
18355615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
18368f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18372593348eSBarry Smith {
1838b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18392593348eSBarry Smith   Mat         outA;
1840de6a44a3SBarry Smith   int         ierr;
18412593348eSBarry Smith 
1842e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18432593348eSBarry Smith 
18442593348eSBarry Smith   outA          = inA;
18452593348eSBarry Smith   inA->factor   = FACTOR_LU;
18462593348eSBarry Smith   a->row        = row;
18472593348eSBarry Smith   a->col        = col;
18482593348eSBarry Smith 
1849eed86810SBarry Smith   if (!a->solve_work) {
1850de6a44a3SBarry Smith     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1851eed86810SBarry Smith     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1852eed86810SBarry Smith   }
18532593348eSBarry Smith 
18542593348eSBarry Smith   if (!a->diag) {
1855b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18562593348eSBarry Smith   }
18577fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1858*3eee16b0SBarry Smith 
1859*3eee16b0SBarry Smith   /*
1860*3eee16b0SBarry Smith       Blocksize 4 has a special faster solver for ILU(0) factorization
1861*3eee16b0SBarry Smith     with natural ordering
1862*3eee16b0SBarry Smith   */
1863*3eee16b0SBarry Smith   if (a->bs == 4) {
1864*3eee16b0SBarry Smith     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
1865*3eee16b0SBarry Smith   }
1866*3eee16b0SBarry Smith 
18672593348eSBarry Smith   return 0;
18682593348eSBarry Smith }
18692593348eSBarry Smith 
18705615d1e5SSatish Balay #undef __FUNC__
18715615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
18728f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18732593348eSBarry Smith {
1874b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18759df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1876b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1877b6490206SBarry Smith   PLogFlops(totalnz);
18782593348eSBarry Smith   return 0;
18792593348eSBarry Smith }
18802593348eSBarry Smith 
18815615d1e5SSatish Balay #undef __FUNC__
18825615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18838f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18847e67e3f9SSatish Balay {
18857e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18867e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1887a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18889df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18897e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18907e67e3f9SSatish Balay 
18917e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
18927e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1893e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1894e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1895a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
18967e67e3f9SSatish Balay     nrow = ailen[brow];
18977e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1898e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1899e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1900a7c10996SSatish Balay       col  = in[l] ;
19017e67e3f9SSatish Balay       bcol = col/bs;
19027e67e3f9SSatish Balay       cidx = col%bs;
19037e67e3f9SSatish Balay       ridx = row%bs;
19047e67e3f9SSatish Balay       high = nrow;
19057e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
19067e67e3f9SSatish Balay       while (high-low > 5) {
19077e67e3f9SSatish Balay         t = (low+high)/2;
19087e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
19097e67e3f9SSatish Balay         else             low  = t;
19107e67e3f9SSatish Balay       }
19117e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
19127e67e3f9SSatish Balay         if (rp[i] > bcol) break;
19137e67e3f9SSatish Balay         if (rp[i] == bcol) {
19147e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
19157e67e3f9SSatish Balay           goto finished;
19167e67e3f9SSatish Balay         }
19177e67e3f9SSatish Balay       }
19187e67e3f9SSatish Balay       *v++ = zero;
19197e67e3f9SSatish Balay       finished:;
19207e67e3f9SSatish Balay     }
19217e67e3f9SSatish Balay   }
19227e67e3f9SSatish Balay   return 0;
19237e67e3f9SSatish Balay }
19247e67e3f9SSatish Balay 
19255615d1e5SSatish Balay #undef __FUNC__
19265eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */
19278f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19285a838052SSatish Balay {
19295a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19305a838052SSatish Balay   *bs = baij->bs;
19315a838052SSatish Balay   return 0;
19325a838052SSatish Balay }
19335a838052SSatish Balay 
1934d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19355615d1e5SSatish Balay #undef __FUNC__
19365615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1937d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1938d9b7c43dSSatish Balay {
1939d9b7c43dSSatish Balay   int i,row;
1940d9b7c43dSSatish Balay   row = idx[0];
1941d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1942d9b7c43dSSatish Balay 
1943d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1944d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1945d9b7c43dSSatish Balay   }
1946d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1947d9b7c43dSSatish Balay   return 0;
1948d9b7c43dSSatish Balay }
1949d9b7c43dSSatish Balay 
19505615d1e5SSatish Balay #undef __FUNC__
19515615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
19528f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1953d9b7c43dSSatish Balay {
1954d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1955d9b7c43dSSatish Balay   IS          is_local;
1956d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1957d9b7c43dSSatish Balay   PetscTruth  flg;
1958d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1959d9b7c43dSSatish Balay 
1960d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1961d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1962d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1963537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1964d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1965d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1966d9b7c43dSSatish Balay 
1967d9b7c43dSSatish Balay   i = 0;
1968d9b7c43dSSatish Balay   while (i < is_n) {
1969e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1970d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1971d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1972d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1973d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1974d9b7c43dSSatish Balay       i += bs;
1975d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1976d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1977d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1978d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1979d9b7c43dSSatish Balay         aa[0] = zero;
1980d9b7c43dSSatish Balay         aa+=bs;
1981d9b7c43dSSatish Balay       }
1982d9b7c43dSSatish Balay       i++;
1983d9b7c43dSSatish Balay     }
1984d9b7c43dSSatish Balay   }
1985d9b7c43dSSatish Balay   if (diag) {
1986d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1987d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1988d9b7c43dSSatish Balay     }
1989d9b7c43dSSatish Balay   }
1990d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1991d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1992d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
19939a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1994d9b7c43dSSatish Balay 
1995d9b7c43dSSatish Balay   return 0;
1996d9b7c43dSSatish Balay }
19971c351548SSatish Balay 
19985615d1e5SSatish Balay #undef __FUNC__
19995eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */
2000ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
2001ba4ca20aSSatish Balay {
2002ba4ca20aSSatish Balay   static int called = 0;
2003ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
2004ba4ca20aSSatish Balay 
2005ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
2006ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2007ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
2008ba4ca20aSSatish Balay   return 0;
2009ba4ca20aSSatish Balay }
2010d9b7c43dSSatish Balay 
20112593348eSBarry Smith /* -------------------------------------------------------------------*/
2012cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
20139867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2014f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2015faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
20161a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
2017b6490206SBarry Smith        0,0,
2018de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
2019b6490206SBarry Smith        0,
2020f2501298SSatish Balay        MatTranspose_SeqBAIJ,
202117e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
20229867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2023584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2024b6490206SBarry Smith        0,
2025d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20267fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2027b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2028de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2029d402145bSBarry Smith        0,0,
2030b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2031b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2032af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20337e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2034ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20353b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20363b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
203792c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
203892c4ed94SBarry Smith        0,0,0,0,0,0,
203992c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20402593348eSBarry Smith 
20415615d1e5SSatish Balay #undef __FUNC__
20425615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20432593348eSBarry Smith /*@C
204444cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
204544cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
204644cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20472bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20482bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20492593348eSBarry Smith 
20502593348eSBarry Smith    Input Parameters:
2051029af93fSBarry Smith .  comm - MPI communicator, set to PETSC_COMM_SELF
2052b6490206SBarry Smith .  bs - size of block
20532593348eSBarry Smith .  m - number of rows
20542593348eSBarry Smith .  n - number of columns
2055b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20562bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20572bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20582593348eSBarry Smith 
20592593348eSBarry Smith    Output Parameter:
20602593348eSBarry Smith .  A - the matrix
20612593348eSBarry Smith 
20620513a670SBarry Smith    Options Database Keys:
20630513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20640513a670SBarry Smith $                     block calculations (much solver)
20650513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20660513a670SBarry Smith 
20672593348eSBarry Smith    Notes:
206844cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20692593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
207044cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20712593348eSBarry Smith 
20722593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20732593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20742593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20756da5968aSLois Curfman McInnes    matrices.
20762593348eSBarry Smith 
207744cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20782593348eSBarry Smith @*/
2079b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20802593348eSBarry Smith {
20812593348eSBarry Smith   Mat         B;
2082b6490206SBarry Smith   Mat_SeqBAIJ *b;
20833b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20843b2fbd54SBarry Smith 
20853b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2086e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2087b6490206SBarry Smith 
20880513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20890513a670SBarry Smith 
2090f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2091e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
20922593348eSBarry Smith 
20932593348eSBarry Smith   *A = 0;
2094f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
20952593348eSBarry Smith   PLogObjectCreate(B);
2096b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
209744cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
20982593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
20991a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
21001a6a6d98SBarry Smith   if (!flg) {
21017fc0212eSBarry Smith     switch (bs) {
21027fc0212eSBarry Smith     case 1:
21037fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21041a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
210539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2106f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
21077fc0212eSBarry Smith       break;
21084eeb42bcSBarry Smith     case 2:
21094eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
21101a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
211139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2112f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
21136d84be18SBarry Smith       break;
2114f327dd97SBarry Smith     case 3:
2115f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
21161a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
211739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2118f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
21194eeb42bcSBarry Smith       break;
21206d84be18SBarry Smith     case 4:
21216d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
21221a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
212339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2124f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21256d84be18SBarry Smith       break;
21266d84be18SBarry Smith     case 5:
21276d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21281a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
212939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2130f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21316d84be18SBarry Smith       break;
213248e9ddb2SSatish Balay     case 7:
213348e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
213448e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
213548e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
213648e9ddb2SSatish Balay       break;
21377fc0212eSBarry Smith     }
21381a6a6d98SBarry Smith   }
2139b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2140b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21412593348eSBarry Smith   B->factor           = 0;
21422593348eSBarry Smith   B->lupivotthreshold = 1.0;
214390f02eecSBarry Smith   B->mapping          = 0;
21442593348eSBarry Smith   b->row              = 0;
21452593348eSBarry Smith   b->col              = 0;
21462593348eSBarry Smith   b->reallocs         = 0;
21472593348eSBarry Smith 
214844cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
214944cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2150b6490206SBarry Smith   b->mbs     = mbs;
2151f2501298SSatish Balay   b->nbs     = nbs;
2152b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21532593348eSBarry Smith   if (nnz == PETSC_NULL) {
2154119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21552593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2156b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2157b6490206SBarry Smith     nz = nz*mbs;
21582593348eSBarry Smith   }
21592593348eSBarry Smith   else {
21602593348eSBarry Smith     nz = 0;
2161b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21622593348eSBarry Smith   }
21632593348eSBarry Smith 
21642593348eSBarry Smith   /* allocate the matrix space */
21657e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21662593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21677e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21687e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21692593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21702593348eSBarry Smith   b->i  = b->j + nz;
21712593348eSBarry Smith   b->singlemalloc = 1;
21722593348eSBarry Smith 
2173de6a44a3SBarry Smith   b->i[0] = 0;
2174b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21752593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21762593348eSBarry Smith   }
21772593348eSBarry Smith 
2178b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2179b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2180f09e8eb9SSatish Balay   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2181b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21822593348eSBarry Smith 
2183b6490206SBarry Smith   b->bs               = bs;
21849df24120SSatish Balay   b->bs2              = bs2;
2185b6490206SBarry Smith   b->mbs              = mbs;
21862593348eSBarry Smith   b->nz               = 0;
218718e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21882593348eSBarry Smith   b->sorted           = 0;
21892593348eSBarry Smith   b->roworiented      = 1;
21902593348eSBarry Smith   b->nonew            = 0;
21912593348eSBarry Smith   b->diag             = 0;
21922593348eSBarry Smith   b->solve_work       = 0;
2193de6a44a3SBarry Smith   b->mult_work        = 0;
21942593348eSBarry Smith   b->spptr            = 0;
21954e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
21964e220ebcSLois Curfman McInnes 
21972593348eSBarry Smith   *A = B;
21982593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
21992593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
22002593348eSBarry Smith   return 0;
22012593348eSBarry Smith }
22022593348eSBarry Smith 
22035615d1e5SSatish Balay #undef __FUNC__
22045615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2205b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
22062593348eSBarry Smith {
22072593348eSBarry Smith   Mat         C;
2208b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
22099df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2210de6a44a3SBarry Smith 
2211e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
22122593348eSBarry Smith 
22132593348eSBarry Smith   *B = 0;
2214f09e8eb9SSatish Balay   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
22152593348eSBarry Smith   PLogObjectCreate(C);
2216b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
22172593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2218b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2219b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
22202593348eSBarry Smith   C->factor     = A->factor;
22212593348eSBarry Smith   c->row        = 0;
22222593348eSBarry Smith   c->col        = 0;
22232593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22242593348eSBarry Smith 
222544cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
222644cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
222744cd7ae7SLois Curfman McInnes   C->M          = a->m;
222844cd7ae7SLois Curfman McInnes   C->N          = a->n;
222944cd7ae7SLois Curfman McInnes 
2230b6490206SBarry Smith   c->bs         = a->bs;
22319df24120SSatish Balay   c->bs2        = a->bs2;
2232b6490206SBarry Smith   c->mbs        = a->mbs;
2233b6490206SBarry Smith   c->nbs        = a->nbs;
22342593348eSBarry Smith 
2235b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2236b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2237b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22382593348eSBarry Smith     c->imax[i] = a->imax[i];
22392593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22402593348eSBarry Smith   }
22412593348eSBarry Smith 
22422593348eSBarry Smith   /* allocate the matrix space */
22432593348eSBarry Smith   c->singlemalloc = 1;
22447e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22452593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22467e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2247de6a44a3SBarry Smith   c->i  = c->j + nz;
2248b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2249b6490206SBarry Smith   if (mbs > 0) {
2250de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22512593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22527e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22532593348eSBarry Smith     }
22542593348eSBarry Smith   }
22552593348eSBarry Smith 
2256f09e8eb9SSatish Balay   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
22572593348eSBarry Smith   c->sorted      = a->sorted;
22582593348eSBarry Smith   c->roworiented = a->roworiented;
22592593348eSBarry Smith   c->nonew       = a->nonew;
22602593348eSBarry Smith 
22612593348eSBarry Smith   if (a->diag) {
2262b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2263b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2264b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22652593348eSBarry Smith       c->diag[i] = a->diag[i];
22662593348eSBarry Smith     }
22672593348eSBarry Smith   }
22682593348eSBarry Smith   else c->diag          = 0;
22692593348eSBarry Smith   c->nz                 = a->nz;
22702593348eSBarry Smith   c->maxnz              = a->maxnz;
22712593348eSBarry Smith   c->solve_work         = 0;
22722593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22737fc0212eSBarry Smith   c->mult_work          = 0;
22742593348eSBarry Smith   *B = C;
22752593348eSBarry Smith   return 0;
22762593348eSBarry Smith }
22772593348eSBarry Smith 
22785615d1e5SSatish Balay #undef __FUNC__
22795615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
228019bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22812593348eSBarry Smith {
2282b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22832593348eSBarry Smith   Mat          B;
2284de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2285b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
228635aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2287a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2288b6490206SBarry Smith   Scalar       *aa;
228919bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22902593348eSBarry Smith 
22911a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2292de6a44a3SBarry Smith   bs2  = bs*bs;
2293b6490206SBarry Smith 
22942593348eSBarry Smith   MPI_Comm_size(comm,&size);
2295e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
229690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
229777c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2298e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
22992593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
23002593348eSBarry Smith 
2301e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
230235aab85fSBarry Smith 
230335aab85fSBarry Smith   /*
230435aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
230535aab85fSBarry Smith     divisible by the blocksize
230635aab85fSBarry Smith   */
2307b6490206SBarry Smith   mbs        = M/bs;
230835aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
230935aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
231035aab85fSBarry Smith   else                  mbs++;
231135aab85fSBarry Smith   if (extra_rows) {
2312537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
231335aab85fSBarry Smith   }
2314b6490206SBarry Smith 
23152593348eSBarry Smith   /* read in row lengths */
231635aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
231777c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
231835aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
23192593348eSBarry Smith 
2320b6490206SBarry Smith   /* read in column indices */
232135aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
232277c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
232335aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2324b6490206SBarry Smith 
2325b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2326b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2327b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
232835aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
232935aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
233035aab85fSBarry Smith   masked = mask + mbs;
2331b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2332b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
233335aab85fSBarry Smith     nmask = 0;
2334b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2335b6490206SBarry Smith       kmax = rowlengths[rowcount];
2336b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
233735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
233835aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2339b6490206SBarry Smith       }
2340b6490206SBarry Smith       rowcount++;
2341b6490206SBarry Smith     }
234235aab85fSBarry Smith     browlengths[i] += nmask;
234335aab85fSBarry Smith     /* zero out the mask elements we set */
234435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2345b6490206SBarry Smith   }
2346b6490206SBarry Smith 
23472593348eSBarry Smith   /* create our matrix */
2348*3eee16b0SBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
23492593348eSBarry Smith   B = *A;
2350b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23512593348eSBarry Smith 
23522593348eSBarry Smith   /* set matrix "i" values */
2353de6a44a3SBarry Smith   a->i[0] = 0;
2354b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2355b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2356b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23572593348eSBarry Smith   }
2358b6490206SBarry Smith   a->nz         = 0;
2359b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23602593348eSBarry Smith 
2361b6490206SBarry Smith   /* read in nonzero values */
236235aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
236377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
236435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2365b6490206SBarry Smith 
2366b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2367b6490206SBarry Smith   nzcount = 0; jcount = 0;
2368b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2369b6490206SBarry Smith     nzcountb = nzcount;
237035aab85fSBarry Smith     nmask    = 0;
2371b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2372b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2373b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
237435aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
237535aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2376b6490206SBarry Smith       }
2377b6490206SBarry Smith       rowcount++;
2378b6490206SBarry Smith     }
2379de6a44a3SBarry Smith     /* sort the masked values */
238077c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2381de6a44a3SBarry Smith 
2382b6490206SBarry Smith     /* set "j" values into matrix */
2383b6490206SBarry Smith     maskcount = 1;
238435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
238535aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2386de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2387b6490206SBarry Smith     }
2388b6490206SBarry Smith     /* set "a" values into matrix */
2389de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2390b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2391b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2392b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2393de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2394de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2395de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2396de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2397b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2398b6490206SBarry Smith       }
2399b6490206SBarry Smith     }
240035aab85fSBarry Smith     /* zero out the mask elements we set */
240135aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2402b6490206SBarry Smith   }
2403e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2404b6490206SBarry Smith 
2405b6490206SBarry Smith   PetscFree(rowlengths);
2406b6490206SBarry Smith   PetscFree(browlengths);
2407b6490206SBarry Smith   PetscFree(aa);
2408b6490206SBarry Smith   PetscFree(jj);
2409b6490206SBarry Smith   PetscFree(mask);
2410b6490206SBarry Smith 
2411b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2412de6a44a3SBarry Smith 
2413de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2414de6a44a3SBarry Smith   if (flg) {
241519bcc07fSBarry Smith     Viewer tviewer;
241619bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2417639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
241819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
241919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2420de6a44a3SBarry Smith   }
2421de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2422de6a44a3SBarry Smith   if (flg) {
242319bcc07fSBarry Smith     Viewer tviewer;
242419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2425639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
242619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
242719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2428de6a44a3SBarry Smith   }
2429de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2430de6a44a3SBarry Smith   if (flg) {
243119bcc07fSBarry Smith     Viewer tviewer;
243219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
243319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
243419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2435de6a44a3SBarry Smith   }
2436de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2437de6a44a3SBarry Smith   if (flg) {
243819bcc07fSBarry Smith     Viewer tviewer;
243919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2440639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
244119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
244219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2443de6a44a3SBarry Smith   }
2444de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2445de6a44a3SBarry Smith   if (flg) {
244619bcc07fSBarry Smith     Viewer tviewer;
244719bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
244819bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
244919bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
245019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2451de6a44a3SBarry Smith   }
24522593348eSBarry Smith   return 0;
24532593348eSBarry Smith }
24542593348eSBarry Smith 
24552593348eSBarry Smith 
24562593348eSBarry Smith 
2457