xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f09e8eb94a771781a812a8d81a9ca3d36ec35eba)
12593348eSBarry Smith #ifndef lint
2*f09e8eb9SSatish Balay static char vcid[] = "$Id: baij.c,v 1.101 1997/05/03 22:48:36 curfman Exp balay $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
93369ce9aSBarry Smith 
103369ce9aSBarry Smith #include "pinclude/pviewer.h"
113369ce9aSBarry Smith #include "sys.h"
1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
131a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
141a6a6d98SBarry Smith #include "src/inline/spops.h"
152593348eSBarry Smith #include "petsc.h"
163b547af2SSatish Balay 
172593348eSBarry Smith 
18de6a44a3SBarry Smith /*
19de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
20de6a44a3SBarry Smith */
21de6a44a3SBarry Smith 
225615d1e5SSatish Balay #undef __FUNC__
235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
25de6a44a3SBarry Smith {
26de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
277fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
28de6a44a3SBarry Smith 
29de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
317fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
32de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33de6a44a3SBarry Smith       if (a->j[j] == i) {
34de6a44a3SBarry Smith         diag[i] = j;
35de6a44a3SBarry Smith         break;
36de6a44a3SBarry Smith       }
37de6a44a3SBarry Smith     }
38de6a44a3SBarry Smith   }
39de6a44a3SBarry Smith   a->diag = diag;
40de6a44a3SBarry Smith   return 0;
41de6a44a3SBarry Smith }
422593348eSBarry Smith 
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
725eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
935eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
11966963ce1SSatish Balay   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
13466963ce1SSatish Balay   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
1515eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes #else
18144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18344cd7ae7SLois Curfman McInnes #endif
18444cd7ae7SLois Curfman McInnes           }
18544cd7ae7SLois Curfman McInnes         }
18644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18744cd7ae7SLois Curfman McInnes       }
18844cd7ae7SLois Curfman McInnes     }
18944cd7ae7SLois Curfman McInnes   }
1902593348eSBarry Smith   else {
191b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
192b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
193b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
194b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1977e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1997e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20088685aaeSLois Curfman McInnes           }
20188685aaeSLois Curfman McInnes           else {
2027e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes #else
2057e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20688685aaeSLois Curfman McInnes #endif
2072593348eSBarry Smith           }
2082593348eSBarry Smith         }
2092593348eSBarry Smith         fprintf(fd,"\n");
2102593348eSBarry Smith       }
2112593348eSBarry Smith     }
212b6490206SBarry Smith   }
2132593348eSBarry Smith   fflush(fd);
2142593348eSBarry Smith   return 0;
2152593348eSBarry Smith }
2162593348eSBarry Smith 
2175615d1e5SSatish Balay #undef __FUNC__
2185eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
245b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
259b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
274b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
305b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
319b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
334b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
3445615d1e5SSatish Balay #undef __FUNC__
3455eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */
3468f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3472593348eSBarry Smith {
3482593348eSBarry Smith   Mat         A = (Mat) obj;
34919bcc07fSBarry Smith   ViewerType  vtype;
35019bcc07fSBarry Smith   int         ierr;
3512593348eSBarry Smith 
35219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35319bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
354e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
360b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3612593348eSBarry Smith   }
36219bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3633270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3642593348eSBarry Smith   }
3652593348eSBarry Smith   return 0;
3662593348eSBarry Smith }
367b6490206SBarry Smith 
368119a934aSSatish Balay #define CHUNKSIZE  10
369cd0e1443SSatish Balay 
3705615d1e5SSatish Balay #undef __FUNC__
3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373cd0e1443SSatish Balay {
374cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3789df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
379cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
380cd0e1443SSatish Balay 
381cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
382cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
384e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
385e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3863b2fbd54SBarry Smith #endif
3872c3acbe9SBarry Smith     rp   = aj + ai[brow];
3882c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3892c3acbe9SBarry Smith     rmax = imax[brow];
3902c3acbe9SBarry Smith     nrow = ailen[brow];
391cd0e1443SSatish Balay     low  = 0;
392cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
394e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
3963b2fbd54SBarry Smith #endif
397a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
398cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
399cd0e1443SSatish Balay       if (roworiented) {
400cd0e1443SSatish Balay         value = *v++;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       else {
403cd0e1443SSatish Balay         value = v[k + l*m];
404cd0e1443SSatish Balay       }
405cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4062c3acbe9SBarry Smith       while (high-low > 7) {
407cd0e1443SSatish Balay         t = (low+high)/2;
408cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
409cd0e1443SSatish Balay         else              low  = t;
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
412cd0e1443SSatish Balay         if (rp[i] > bcol) break;
413cd0e1443SSatish Balay         if (rp[i] == bcol) {
4147e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
415cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
416cd0e1443SSatish Balay           else                  *bap  = value;
417f1241b54SBarry Smith           goto noinsert1;
418cd0e1443SSatish Balay         }
419cd0e1443SSatish Balay       }
420c2653b3dSLois Curfman McInnes       if (nonew == 1) goto noinsert1;
42111ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
422cd0e1443SSatish Balay       if (nrow >= rmax) {
423cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
424cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
425cd0e1443SSatish Balay         Scalar *new_a;
426cd0e1443SSatish Balay 
42711ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
42896854ed6SLois Curfman McInnes 
42996854ed6SLois Curfman McInnes         /* Malloc new storage space */
4307e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
431cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4327e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
433cd0e1443SSatish Balay         new_i   = new_j + new_nz;
434cd0e1443SSatish Balay 
435cd0e1443SSatish Balay         /* copy over old data into new slots */
436cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4377e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
438a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
439a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
440a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
441cd0e1443SSatish Balay                                                            len*sizeof(int));
442a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
443a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
444a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
445a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
446cd0e1443SSatish Balay         /* free up old matrix storage */
447cd0e1443SSatish Balay         PetscFree(a->a);
448cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
449cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
450cd0e1443SSatish Balay         a->singlemalloc = 1;
451cd0e1443SSatish Balay 
452a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
453cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4547e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
45518e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
456cd0e1443SSatish Balay         a->reallocs++;
457119a934aSSatish Balay         a->nz++;
458cd0e1443SSatish Balay       }
4597e67e3f9SSatish Balay       N = nrow++ - 1;
460cd0e1443SSatish Balay       /* shift up all the later entries in this row */
461cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
462cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4637e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
464cd0e1443SSatish Balay       }
4657e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
466cd0e1443SSatish Balay       rp[i]                      = bcol;
4677e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
468f1241b54SBarry Smith       noinsert1:;
469cd0e1443SSatish Balay       low = i;
470cd0e1443SSatish Balay     }
471cd0e1443SSatish Balay     ailen[brow] = nrow;
472cd0e1443SSatish Balay   }
473cd0e1443SSatish Balay   return 0;
474cd0e1443SSatish Balay }
475cd0e1443SSatish Balay 
4765615d1e5SSatish Balay #undef __FUNC__
47792c4ed94SBarry Smith #define __FUNC__ "MatSetValues_SeqBAIJ"
47892c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
47992c4ed94SBarry Smith {
48092c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4818a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
4820e324ae4SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
4830e324ae4SSatish Balay   int         roworiented=a->roworiented;
4848a84c255SSatish Balay   int         *aj=a->j,nonew=a->nonew;
4850e324ae4SSatish Balay   int          bs2=a->bs2,bs=a->bs,stepval;
4860e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
48792c4ed94SBarry Smith 
4880e324ae4SSatish Balay   if (roworiented) {
4890e324ae4SSatish Balay     stepval = (n-1)*bs;
4900e324ae4SSatish Balay   } else {
4910e324ae4SSatish Balay     stepval = (m-1)*bs;
4920e324ae4SSatish Balay   }
49392c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
49492c4ed94SBarry Smith     row  = im[k];
49592c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
49692c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
4978a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
49892c4ed94SBarry Smith #endif
49992c4ed94SBarry Smith     rp   = aj + ai[row];
50092c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
50192c4ed94SBarry Smith     rmax = imax[row];
50292c4ed94SBarry Smith     nrow = ailen[row];
50392c4ed94SBarry Smith     low  = 0;
50492c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
50592c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
50692c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
5078a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
50892c4ed94SBarry Smith #endif
50992c4ed94SBarry Smith       col = in[l];
51092c4ed94SBarry Smith       if (roworiented) {
5110e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5120e324ae4SSatish Balay       } else {
5130e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
51492c4ed94SBarry Smith       }
51592c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
51692c4ed94SBarry Smith       while (high-low > 7) {
51792c4ed94SBarry Smith         t = (low+high)/2;
51892c4ed94SBarry Smith         if (rp[t] > col) high = t;
51992c4ed94SBarry Smith         else             low  = t;
52092c4ed94SBarry Smith       }
52192c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
52292c4ed94SBarry Smith         if (rp[i] > col) break;
52392c4ed94SBarry Smith         if (rp[i] == col) {
5248a84c255SSatish Balay           bap  = ap +  bs2*i;
5250e324ae4SSatish Balay           if (roworiented) {
5268a84c255SSatish Balay             if (is == ADD_VALUES) {
5270e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5288a84c255SSatish Balay                 for (jj=ii; jj<bs2; jj+=bs )
5298a84c255SSatish Balay                   bap[jj] += *value++;
5300e324ae4SSatish Balay             } else {
5310e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5320e324ae4SSatish Balay                 for (jj=ii; jj<bs2; jj+=bs )
5330e324ae4SSatish Balay                   bap[jj] = *value++;
5348a84c255SSatish Balay             }
5350e324ae4SSatish Balay           } else {
5360e324ae4SSatish Balay             if (is == ADD_VALUES) {
5370e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5380e324ae4SSatish Balay                 for (jj=0; jj<bs; jj++ )
5390e324ae4SSatish Balay                   *bap++ += *value++;
5400e324ae4SSatish Balay             } else {
5410e324ae4SSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval )
5420e324ae4SSatish Balay                 for (jj=0; jj<bs; jj++ )
5430e324ae4SSatish Balay                   *bap++  = *value++;
5440e324ae4SSatish Balay             }
5458a84c255SSatish Balay           }
546f1241b54SBarry Smith           goto noinsert2;
54792c4ed94SBarry Smith         }
54892c4ed94SBarry Smith       }
54989280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
55011ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
55192c4ed94SBarry Smith       if (nrow >= rmax) {
55292c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
55392c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
55492c4ed94SBarry Smith         Scalar *new_a;
55592c4ed94SBarry Smith 
55611ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
55789280ab3SLois Curfman McInnes 
55892c4ed94SBarry Smith         /* malloc new storage space */
55992c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
56092c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
56192c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
56292c4ed94SBarry Smith         new_i   = new_j + new_nz;
56392c4ed94SBarry Smith 
56492c4ed94SBarry Smith         /* copy over old data into new slots */
56592c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
56692c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
56792c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
56892c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
56992c4ed94SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
57092c4ed94SBarry Smith                                                            len*sizeof(int));
57192c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
57292c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
57392c4ed94SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
57492c4ed94SBarry Smith                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
57592c4ed94SBarry Smith         /* free up old matrix storage */
57692c4ed94SBarry Smith         PetscFree(a->a);
57792c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
57892c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
57992c4ed94SBarry Smith         a->singlemalloc = 1;
58092c4ed94SBarry Smith 
58192c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
58292c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
58392c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
58492c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
58592c4ed94SBarry Smith         a->reallocs++;
58692c4ed94SBarry Smith         a->nz++;
58792c4ed94SBarry Smith       }
58892c4ed94SBarry Smith       N = nrow++ - 1;
58992c4ed94SBarry Smith       /* shift up all the later entries in this row */
59092c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
59192c4ed94SBarry Smith         rp[ii+1] = rp[ii];
59292c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
59392c4ed94SBarry Smith       }
59492c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
59592c4ed94SBarry Smith       rp[i] = col;
5968a84c255SSatish Balay       bap   = ap +  bs2*i;
5970e324ae4SSatish Balay       if (roworiented) {
5980e324ae4SSatish Balay         for ( ii=0; ii<bs; ii++,value+=stepval)
5998a84c255SSatish Balay           for (jj=ii; jj<bs2; jj+=bs )
6000e324ae4SSatish Balay             bap[jj] = *value++;
6010e324ae4SSatish Balay       } else {
6020e324ae4SSatish Balay         for ( ii=0; ii<bs; ii++,value+=stepval )
6030e324ae4SSatish Balay           for (jj=0; jj<bs; jj++ )
6040e324ae4SSatish Balay             *bap++  = *value++;
6050e324ae4SSatish Balay       }
606f1241b54SBarry Smith       noinsert2:;
60792c4ed94SBarry Smith       low = i;
60892c4ed94SBarry Smith     }
60992c4ed94SBarry Smith     ailen[row] = nrow;
61092c4ed94SBarry Smith   }
61192c4ed94SBarry Smith   return 0;
61292c4ed94SBarry Smith }
61392c4ed94SBarry Smith 
61492c4ed94SBarry Smith #undef __FUNC__
6155eea60f9SBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */
6168f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6170b824a48SSatish Balay {
6180b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6190b824a48SSatish Balay   *m = a->m; *n = a->n;
6200b824a48SSatish Balay   return 0;
6210b824a48SSatish Balay }
6220b824a48SSatish Balay 
6235615d1e5SSatish Balay #undef __FUNC__
6245eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */
6258f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6260b824a48SSatish Balay {
6270b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6280b824a48SSatish Balay   *m = 0; *n = a->m;
6290b824a48SSatish Balay   return 0;
6300b824a48SSatish Balay }
6310b824a48SSatish Balay 
6325615d1e5SSatish Balay #undef __FUNC__
6335615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6349867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6359867e207SSatish Balay {
6369867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6377e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6389867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6399867e207SSatish Balay 
6409867e207SSatish Balay   bs  = a->bs;
6419867e207SSatish Balay   ai  = a->i;
6429867e207SSatish Balay   aj  = a->j;
6439867e207SSatish Balay   aa  = a->a;
6449df24120SSatish Balay   bs2 = a->bs2;
6459867e207SSatish Balay 
646e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6479867e207SSatish Balay 
6489867e207SSatish Balay   bn  = row/bs;   /* Block number */
6499867e207SSatish Balay   bp  = row % bs; /* Block Position */
6509867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6519867e207SSatish Balay   *nz = bs*M;
6529867e207SSatish Balay 
6539867e207SSatish Balay   if (v) {
6549867e207SSatish Balay     *v = 0;
6559867e207SSatish Balay     if (*nz) {
6569867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6579867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6589867e207SSatish Balay         v_i  = *v + i*bs;
6597e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6607e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6619867e207SSatish Balay       }
6629867e207SSatish Balay     }
6639867e207SSatish Balay   }
6649867e207SSatish Balay 
6659867e207SSatish Balay   if (idx) {
6669867e207SSatish Balay     *idx = 0;
6679867e207SSatish Balay     if (*nz) {
6689867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6699867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6709867e207SSatish Balay         idx_i = *idx + i*bs;
6719867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6729867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6739867e207SSatish Balay       }
6749867e207SSatish Balay     }
6759867e207SSatish Balay   }
6769867e207SSatish Balay   return 0;
6779867e207SSatish Balay }
6789867e207SSatish Balay 
6795615d1e5SSatish Balay #undef __FUNC__
6805eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */
6819867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6829867e207SSatish Balay {
6839867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6849867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
6859867e207SSatish Balay   return 0;
6869867e207SSatish Balay }
687b6490206SBarry Smith 
6885615d1e5SSatish Balay #undef __FUNC__
6895615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
6908f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B)
691f2501298SSatish Balay {
692f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
693f2501298SSatish Balay   Mat         C;
694f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
6959df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
696f2501298SSatish Balay   Scalar      *array=a->a;
697f2501298SSatish Balay 
698f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
699e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
700f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
701f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
702a7c10996SSatish Balay 
703a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
704f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
705f2501298SSatish Balay   PetscFree(col);
706f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
707f2501298SSatish Balay   cols = rows + bs;
708f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
709f2501298SSatish Balay     cols[0] = i*bs;
710f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
711f2501298SSatish Balay     len = ai[i+1] - ai[i];
712f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
713f2501298SSatish Balay       rows[0] = (*aj++)*bs;
714f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
715f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
716f2501298SSatish Balay       array += bs2;
717f2501298SSatish Balay     }
718f2501298SSatish Balay   }
7191073c447SSatish Balay   PetscFree(rows);
720f2501298SSatish Balay 
7216d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7226d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
723f2501298SSatish Balay 
724f2501298SSatish Balay   if (B != PETSC_NULL) {
725f2501298SSatish Balay     *B = C;
726f2501298SSatish Balay   } else {
727f2501298SSatish Balay     /* This isn't really an in-place transpose */
728f2501298SSatish Balay     PetscFree(a->a);
729f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
730f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
731f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
732f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
733f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
734f2501298SSatish Balay     PetscFree(a);
735*f09e8eb9SSatish Balay     PetscMemcpy(A,C,sizeof(struct _p_Mat));
736f2501298SSatish Balay     PetscHeaderDestroy(C);
737f2501298SSatish Balay   }
738f2501298SSatish Balay   return 0;
739f2501298SSatish Balay }
740f2501298SSatish Balay 
741f2501298SSatish Balay 
7425615d1e5SSatish Balay #undef __FUNC__
7435615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
7448f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
745584200bdSSatish Balay {
746584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
747584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
748a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
749d402145bSBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax;
750584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
751584200bdSSatish Balay 
7526d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
753584200bdSSatish Balay 
754d402145bSBarry Smith   rmax = ailen[0];
755584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
756584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
757584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
758d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
759584200bdSSatish Balay     if (fshift) {
760a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
761584200bdSSatish Balay       N = ailen[i];
762584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
763584200bdSSatish Balay         ip[j-fshift] = ip[j];
7647e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
765584200bdSSatish Balay       }
766584200bdSSatish Balay     }
767584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
768584200bdSSatish Balay   }
769584200bdSSatish Balay   if (mbs) {
770584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
771584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
772584200bdSSatish Balay   }
773584200bdSSatish Balay   /* reset ilen and imax for each row */
774584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
775584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
776584200bdSSatish Balay   }
777a7c10996SSatish Balay   a->nz = ai[mbs];
778584200bdSSatish Balay 
779584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
780584200bdSSatish Balay   if (fshift && a->diag) {
781584200bdSSatish Balay     PetscFree(a->diag);
782584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
783584200bdSSatish Balay     a->diag = 0;
784584200bdSSatish Balay   }
7853d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
786219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
7873d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
788584200bdSSatish Balay            a->reallocs);
789d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
790e2f3b5e9SSatish Balay   a->reallocs          = 0;
7914e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
7924e220ebcSLois Curfman McInnes 
793584200bdSSatish Balay   return 0;
794584200bdSSatish Balay }
795584200bdSSatish Balay 
7965615d1e5SSatish Balay #undef __FUNC__
7975615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
7988f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A)
7992593348eSBarry Smith {
800b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8019df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
8022593348eSBarry Smith   return 0;
8032593348eSBarry Smith }
8042593348eSBarry Smith 
8055615d1e5SSatish Balay #undef __FUNC__
8065eea60f9SBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */
807b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8082593348eSBarry Smith {
8092593348eSBarry Smith   Mat         A  = (Mat) obj;
810b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
81190f02eecSBarry Smith   int         ierr;
8122593348eSBarry Smith 
8132593348eSBarry Smith #if defined(PETSC_LOG)
8142593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8152593348eSBarry Smith #endif
8162593348eSBarry Smith   PetscFree(a->a);
8172593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8182593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8192593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8202593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8212593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
822de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8232593348eSBarry Smith   PetscFree(a);
82490f02eecSBarry Smith   if (A->mapping) {
82590f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
82690f02eecSBarry Smith   }
827f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
828f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8292593348eSBarry Smith   return 0;
8302593348eSBarry Smith }
8312593348eSBarry Smith 
8325615d1e5SSatish Balay #undef __FUNC__
8335eea60f9SBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */
8348f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8352593348eSBarry Smith {
836b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8376d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
8386d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
8396d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
840219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
8416d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
842c2653b3dSLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
84396854ed6SLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
8446d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
8456d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
846219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8476d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8486d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
84990f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
8502b362799SSatish Balay            op == MAT_IGNORE_OFF_PROC_ENTRIES)
85194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8526d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
853e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8542593348eSBarry Smith   else
855e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8562593348eSBarry Smith   return 0;
8572593348eSBarry Smith }
8582593348eSBarry Smith 
8592593348eSBarry Smith 
8602593348eSBarry Smith /* -------------------------------------------------------*/
8612593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8622593348eSBarry Smith /* -------------------------------------------------------*/
863b6490206SBarry Smith #include "pinclude/plapack.h"
864b6490206SBarry Smith 
8655615d1e5SSatish Balay #undef __FUNC__
8665615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
86739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8682593348eSBarry Smith {
869b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
87039b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
871c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8722593348eSBarry Smith 
873c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
874c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
875b6490206SBarry Smith 
876119a934aSSatish Balay   idx   = a->j;
877119a934aSSatish Balay   v     = a->a;
878119a934aSSatish Balay   ii    = a->i;
879119a934aSSatish Balay 
880119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
881119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
882119a934aSSatish Balay     sum  = 0.0;
883119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8841a6a6d98SBarry Smith     z[i] = sum;
885119a934aSSatish Balay   }
886c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
887c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
88839b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
88939b95ed1SBarry Smith   return 0;
89039b95ed1SBarry Smith }
89139b95ed1SBarry Smith 
8925615d1e5SSatish Balay #undef __FUNC__
8935615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
89439b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
89539b95ed1SBarry Smith {
89639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
89739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
89839b95ed1SBarry Smith   register Scalar x1,x2;
89939b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
900c16cb8f2SBarry Smith   int             j,n;
90139b95ed1SBarry Smith 
902c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
903c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
90439b95ed1SBarry Smith 
90539b95ed1SBarry Smith   idx   = a->j;
90639b95ed1SBarry Smith   v     = a->a;
90739b95ed1SBarry Smith   ii    = a->i;
90839b95ed1SBarry Smith 
909119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
910119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
911119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
912119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
913119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
914119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
915119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
916119a934aSSatish Balay       v += 4;
917119a934aSSatish Balay     }
9181a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
919119a934aSSatish Balay     z += 2;
920119a934aSSatish Balay   }
921c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
922c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
92339b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
92439b95ed1SBarry Smith   return 0;
92539b95ed1SBarry Smith }
92639b95ed1SBarry Smith 
9275615d1e5SSatish Balay #undef __FUNC__
9285615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
92939b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
93039b95ed1SBarry Smith {
93139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
93239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
933c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
93439b95ed1SBarry Smith 
935c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
936c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
93739b95ed1SBarry Smith 
93839b95ed1SBarry Smith   idx   = a->j;
93939b95ed1SBarry Smith   v     = a->a;
94039b95ed1SBarry Smith   ii    = a->i;
94139b95ed1SBarry Smith 
942119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
943119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
944119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
945119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
946119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
947119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
948119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
949119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
950119a934aSSatish Balay       v += 9;
951119a934aSSatish Balay     }
9521a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
953119a934aSSatish Balay     z += 3;
954119a934aSSatish Balay   }
955c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
956c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
95739b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
95839b95ed1SBarry Smith   return 0;
95939b95ed1SBarry Smith }
96039b95ed1SBarry Smith 
9615615d1e5SSatish Balay #undef __FUNC__
9625615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
96339b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
96439b95ed1SBarry Smith {
96539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
96639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
96739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
96839b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
969c16cb8f2SBarry Smith   int             j,n;
97039b95ed1SBarry Smith 
971c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
972c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
97339b95ed1SBarry Smith 
97439b95ed1SBarry Smith   idx   = a->j;
97539b95ed1SBarry Smith   v     = a->a;
97639b95ed1SBarry Smith   ii    = a->i;
97739b95ed1SBarry Smith 
978119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
979119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
980119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
981119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
982119a934aSSatish Balay       xb = x + 4*(*idx++);
983119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
984119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
985119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
986119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
987119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
988119a934aSSatish Balay       v += 16;
989119a934aSSatish Balay     }
9901a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
991119a934aSSatish Balay     z += 4;
992119a934aSSatish Balay   }
993c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
994c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
99539b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
99639b95ed1SBarry Smith   return 0;
99739b95ed1SBarry Smith }
99839b95ed1SBarry Smith 
9995615d1e5SSatish Balay #undef __FUNC__
10005615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
100139b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
100239b95ed1SBarry Smith {
100339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
100439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
100539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
1006c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
100739b95ed1SBarry Smith 
1008c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1009c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
101039b95ed1SBarry Smith 
101139b95ed1SBarry Smith   idx   = a->j;
101239b95ed1SBarry Smith   v     = a->a;
101339b95ed1SBarry Smith   ii    = a->i;
101439b95ed1SBarry Smith 
1015119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1016119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1017119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1018119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1019119a934aSSatish Balay       xb = x + 5*(*idx++);
1020119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1021119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1022119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1023119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1024119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1025119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1026119a934aSSatish Balay       v += 25;
1027119a934aSSatish Balay     }
10281a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1029119a934aSSatish Balay     z += 5;
1030119a934aSSatish Balay   }
1031c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1032c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
103339b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
103439b95ed1SBarry Smith   return 0;
103539b95ed1SBarry Smith }
103639b95ed1SBarry Smith 
10375615d1e5SSatish Balay #undef __FUNC__
103848e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
103948e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
104048e9ddb2SSatish Balay {
104148e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
104248e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
104348e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
104448e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
104548e9ddb2SSatish Balay 
104648e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
104748e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
104848e9ddb2SSatish Balay 
104948e9ddb2SSatish Balay   idx   = a->j;
105048e9ddb2SSatish Balay   v     = a->a;
105148e9ddb2SSatish Balay   ii    = a->i;
105248e9ddb2SSatish Balay 
105348e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
105448e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
105548e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
105648e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
105748e9ddb2SSatish Balay       xb = x + 7*(*idx++);
105848e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
105948e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
106048e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
106148e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
106248e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
106348e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
106448e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
106548e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
106648e9ddb2SSatish Balay       v += 49;
106748e9ddb2SSatish Balay     }
106848e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
106948e9ddb2SSatish Balay     z += 7;
107048e9ddb2SSatish Balay   }
107148e9ddb2SSatish Balay 
107248e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
107348e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
107448e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
107548e9ddb2SSatish Balay   return 0;
107648e9ddb2SSatish Balay }
107748e9ddb2SSatish Balay 
107848e9ddb2SSatish Balay #undef __FUNC__
10795615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
108039b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
108139b95ed1SBarry Smith {
108239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
108339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1084c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1085c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1086c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
108739b95ed1SBarry Smith 
1088c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1089c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
109039b95ed1SBarry Smith 
109139b95ed1SBarry Smith   idx   = a->j;
109239b95ed1SBarry Smith   v     = a->a;
109339b95ed1SBarry Smith   ii    = a->i;
109439b95ed1SBarry Smith 
109539b95ed1SBarry Smith 
1096119a934aSSatish Balay   if (!a->mult_work) {
10973b547af2SSatish Balay     k = PetscMax(a->m,a->n);
10982b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1099119a934aSSatish Balay   }
1100119a934aSSatish Balay   work = a->mult_work;
1101119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1102119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1103119a934aSSatish Balay     ncols = n*bs;
1104119a934aSSatish Balay     workt = work;
1105119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1106119a934aSSatish Balay       xb = x + bs*(*idx++);
1107119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1108119a934aSSatish Balay       workt += bs;
1109119a934aSSatish Balay     }
11101a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1111119a934aSSatish Balay     v += n*bs2;
1112119a934aSSatish Balay     z += bs;
1113119a934aSSatish Balay   }
1114c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1115c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11161a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1117bb42667fSSatish Balay   return 0;
1118bb42667fSSatish Balay }
1119bb42667fSSatish Balay 
11205615d1e5SSatish Balay #undef __FUNC__
11215615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1122f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1123f44d3a6dSSatish Balay {
1124f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1125f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1126c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1127f44d3a6dSSatish Balay 
1128c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1129c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1130c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1131f44d3a6dSSatish Balay 
1132f44d3a6dSSatish Balay   idx   = a->j;
1133f44d3a6dSSatish Balay   v     = a->a;
1134f44d3a6dSSatish Balay   ii    = a->i;
1135f44d3a6dSSatish Balay 
1136f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1137f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1138f44d3a6dSSatish Balay     sum  = y[i];
1139f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1140f44d3a6dSSatish Balay     z[i] = sum;
1141f44d3a6dSSatish Balay   }
1142c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1143c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1144c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1145f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1146f44d3a6dSSatish Balay   return 0;
1147f44d3a6dSSatish Balay }
1148f44d3a6dSSatish Balay 
11495615d1e5SSatish Balay #undef __FUNC__
11505615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1151f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1152f44d3a6dSSatish Balay {
1153f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1154f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1155f44d3a6dSSatish Balay   register Scalar x1,x2;
1156f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1157c16cb8f2SBarry Smith   int             j,n;
1158f44d3a6dSSatish Balay 
1159c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1160c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1161c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1162f44d3a6dSSatish Balay 
1163f44d3a6dSSatish Balay   idx   = a->j;
1164f44d3a6dSSatish Balay   v     = a->a;
1165f44d3a6dSSatish Balay   ii    = a->i;
1166f44d3a6dSSatish Balay 
1167f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1168f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1169f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1170f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1171f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1172f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1173f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1174f44d3a6dSSatish Balay       v += 4;
1175f44d3a6dSSatish Balay     }
1176f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1177f44d3a6dSSatish Balay     z += 2; y += 2;
1178f44d3a6dSSatish Balay   }
1179c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1180c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1181c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1182f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1183f44d3a6dSSatish Balay   return 0;
1184f44d3a6dSSatish Balay }
1185f44d3a6dSSatish Balay 
11865615d1e5SSatish Balay #undef __FUNC__
11875615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1188f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1189f44d3a6dSSatish Balay {
1190f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1191f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1192c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1193f44d3a6dSSatish Balay 
1194c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1195c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1196c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1197f44d3a6dSSatish Balay 
1198f44d3a6dSSatish Balay   idx   = a->j;
1199f44d3a6dSSatish Balay   v     = a->a;
1200f44d3a6dSSatish Balay   ii    = a->i;
1201f44d3a6dSSatish Balay 
1202f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1203f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1204f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1205f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1206f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1207f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1208f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1209f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1210f44d3a6dSSatish Balay       v += 9;
1211f44d3a6dSSatish Balay     }
1212f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1213f44d3a6dSSatish Balay     z += 3; y += 3;
1214f44d3a6dSSatish Balay   }
1215c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1216c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1217c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1218f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1219f44d3a6dSSatish Balay   return 0;
1220f44d3a6dSSatish Balay }
1221f44d3a6dSSatish Balay 
12225615d1e5SSatish Balay #undef __FUNC__
12235615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1224f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1225f44d3a6dSSatish Balay {
1226f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1227f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1228f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1229f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1230c16cb8f2SBarry Smith   int             j,n;
1231f44d3a6dSSatish Balay 
1232c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1233c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1234c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1235f44d3a6dSSatish Balay 
1236f44d3a6dSSatish Balay   idx   = a->j;
1237f44d3a6dSSatish Balay   v     = a->a;
1238f44d3a6dSSatish Balay   ii    = a->i;
1239f44d3a6dSSatish Balay 
1240f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1241f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1242f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1243f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1244f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1245f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1246f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1247f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1248f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1249f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1250f44d3a6dSSatish Balay       v += 16;
1251f44d3a6dSSatish Balay     }
1252f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1253f44d3a6dSSatish Balay     z += 4; y += 4;
1254f44d3a6dSSatish Balay   }
1255c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1256c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1257c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1258f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1259f44d3a6dSSatish Balay   return 0;
1260f44d3a6dSSatish Balay }
1261f44d3a6dSSatish Balay 
12625615d1e5SSatish Balay #undef __FUNC__
12635615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1264f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1265f44d3a6dSSatish Balay {
1266f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1267f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1268f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1269c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1270f44d3a6dSSatish Balay 
1271c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1272c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1273c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1274f44d3a6dSSatish Balay 
1275f44d3a6dSSatish Balay   idx   = a->j;
1276f44d3a6dSSatish Balay   v     = a->a;
1277f44d3a6dSSatish Balay   ii    = a->i;
1278f44d3a6dSSatish Balay 
1279f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1280f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1281f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1282f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1283f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1284f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1285f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1286f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1287f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1288f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1289f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1290f44d3a6dSSatish Balay       v += 25;
1291f44d3a6dSSatish Balay     }
1292f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1293f44d3a6dSSatish Balay     z += 5; y += 5;
1294f44d3a6dSSatish Balay   }
1295c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1296c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1297c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1298f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1299f44d3a6dSSatish Balay   return 0;
1300f44d3a6dSSatish Balay }
1301f44d3a6dSSatish Balay 
13025615d1e5SSatish Balay #undef __FUNC__
130348e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
130448e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
130548e9ddb2SSatish Balay {
130648e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
130748e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
130848e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
130948e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
131048e9ddb2SSatish Balay 
131148e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
131248e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
131348e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
131448e9ddb2SSatish Balay 
131548e9ddb2SSatish Balay   idx   = a->j;
131648e9ddb2SSatish Balay   v     = a->a;
131748e9ddb2SSatish Balay   ii    = a->i;
131848e9ddb2SSatish Balay 
131948e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
132048e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
132148e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
132248e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
132348e9ddb2SSatish Balay       xb = x + 7*(*idx++);
132448e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
132548e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
132648e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
132748e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
132848e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
132948e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
133048e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
133148e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
133248e9ddb2SSatish Balay       v += 49;
133348e9ddb2SSatish Balay     }
133448e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
133548e9ddb2SSatish Balay     z += 7; y += 7;
133648e9ddb2SSatish Balay   }
133748e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
133848e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
133948e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
134048e9ddb2SSatish Balay   PLogFlops(98*a->nz);
134148e9ddb2SSatish Balay   return 0;
134248e9ddb2SSatish Balay }
134348e9ddb2SSatish Balay 
134448e9ddb2SSatish Balay 
134548e9ddb2SSatish Balay #undef __FUNC__
13465615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1347f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1348f44d3a6dSSatish Balay {
1349f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1350f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1351f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1352f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1353f44d3a6dSSatish Balay 
1354f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1355f44d3a6dSSatish Balay 
1356c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1357c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1358f44d3a6dSSatish Balay 
1359f44d3a6dSSatish Balay   idx   = a->j;
1360f44d3a6dSSatish Balay   v     = a->a;
1361f44d3a6dSSatish Balay   ii    = a->i;
1362f44d3a6dSSatish Balay 
1363f44d3a6dSSatish Balay 
1364f44d3a6dSSatish Balay   if (!a->mult_work) {
1365f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1366f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1367f44d3a6dSSatish Balay   }
1368f44d3a6dSSatish Balay   work = a->mult_work;
1369f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1370f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1371f44d3a6dSSatish Balay     ncols = n*bs;
1372f44d3a6dSSatish Balay     workt = work;
1373f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1374f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1375f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1376f44d3a6dSSatish Balay       workt += bs;
1377f44d3a6dSSatish Balay     }
1378f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1379f44d3a6dSSatish Balay     v += n*bs2;
1380f44d3a6dSSatish Balay     z += bs;
1381f44d3a6dSSatish Balay   }
1382c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1383c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1384f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1385f44d3a6dSSatish Balay   return 0;
1386f44d3a6dSSatish Balay }
1387f44d3a6dSSatish Balay 
13885615d1e5SSatish Balay #undef __FUNC__
13895615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
13908f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1391bb42667fSSatish Balay {
1392bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
13931a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1394bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1395bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
13969df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1397119a934aSSatish Balay 
1398119a934aSSatish Balay 
139990f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
140090f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1401bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1402bb42667fSSatish Balay 
1403119a934aSSatish Balay   idx   = a->j;
1404119a934aSSatish Balay   v     = a->a;
1405119a934aSSatish Balay   ii    = a->i;
1406119a934aSSatish Balay 
1407119a934aSSatish Balay   switch (bs) {
1408119a934aSSatish Balay   case 1:
1409119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1410119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1411119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1412119a934aSSatish Balay       ib = idx + ai[i];
1413119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1414bb1453f0SSatish Balay         rval    = ib[j];
1415bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1416119a934aSSatish Balay       }
1417119a934aSSatish Balay     }
1418119a934aSSatish Balay     break;
1419119a934aSSatish Balay   case 2:
1420119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1421119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1422119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1423119a934aSSatish Balay       ib = idx + ai[i];
1424119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1425119a934aSSatish Balay         rval      = ib[j]*2;
1426bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1427bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1428119a934aSSatish Balay         v += 4;
1429119a934aSSatish Balay       }
1430119a934aSSatish Balay     }
1431119a934aSSatish Balay     break;
1432119a934aSSatish Balay   case 3:
1433119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1434119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1435119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1436119a934aSSatish Balay       ib = idx + ai[i];
1437119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1438119a934aSSatish Balay         rval      = ib[j]*3;
1439bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1440bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1441bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1442119a934aSSatish Balay         v += 9;
1443119a934aSSatish Balay       }
1444119a934aSSatish Balay     }
1445119a934aSSatish Balay     break;
1446119a934aSSatish Balay   case 4:
1447119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1448119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1449119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1450119a934aSSatish Balay       ib = idx + ai[i];
1451119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1452119a934aSSatish Balay         rval      = ib[j]*4;
1453bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1454bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1455bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1456bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1457119a934aSSatish Balay         v += 16;
1458119a934aSSatish Balay       }
1459119a934aSSatish Balay     }
1460119a934aSSatish Balay     break;
1461119a934aSSatish Balay   case 5:
1462119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1463119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1464119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1465119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1466119a934aSSatish Balay       ib = idx + ai[i];
1467119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1468119a934aSSatish Balay         rval      = ib[j]*5;
1469bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1470bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1471bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1472bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1473bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1474119a934aSSatish Balay         v += 25;
1475119a934aSSatish Balay       }
1476119a934aSSatish Balay     }
1477119a934aSSatish Balay     break;
1478119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1479119a934aSSatish Balay     default: {
1480119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1481119a934aSSatish Balay       if (!a->mult_work) {
14823b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1483bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1484119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1485119a934aSSatish Balay       }
1486119a934aSSatish Balay       work = a->mult_work;
1487119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1488119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1489119a934aSSatish Balay         ncols = n*bs;
1490119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1491119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1492119a934aSSatish Balay         v += n*bs2;
1493119a934aSSatish Balay         x += bs;
1494119a934aSSatish Balay         workt = work;
1495119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1496119a934aSSatish Balay           zb = z + bs*(*idx++);
1497bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1498119a934aSSatish Balay           workt += bs;
1499119a934aSSatish Balay         }
1500119a934aSSatish Balay       }
1501119a934aSSatish Balay     }
1502119a934aSSatish Balay   }
15030419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
15040419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1505faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1506faf6580fSSatish Balay   return 0;
1507faf6580fSSatish Balay }
15081c351548SSatish Balay 
15095615d1e5SSatish Balay #undef __FUNC__
15105615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
15118f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1512faf6580fSSatish Balay {
1513faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1514faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1515faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1516faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1517faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1518faf6580fSSatish Balay 
1519faf6580fSSatish Balay 
1520faf6580fSSatish Balay 
152190f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
152290f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1523faf6580fSSatish Balay 
1524648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1525648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1526648c1d95SSatish Balay 
1527faf6580fSSatish Balay 
1528faf6580fSSatish Balay   idx   = a->j;
1529faf6580fSSatish Balay   v     = a->a;
1530faf6580fSSatish Balay   ii    = a->i;
1531faf6580fSSatish Balay 
1532faf6580fSSatish Balay   switch (bs) {
1533faf6580fSSatish Balay   case 1:
1534faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1535faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1536faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1537faf6580fSSatish Balay       ib = idx + ai[i];
1538faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1539faf6580fSSatish Balay         rval    = ib[j];
1540faf6580fSSatish Balay         z[rval] += *v++ * x1;
1541faf6580fSSatish Balay       }
1542faf6580fSSatish Balay     }
1543faf6580fSSatish Balay     break;
1544faf6580fSSatish Balay   case 2:
1545faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1546faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1547faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1548faf6580fSSatish Balay       ib = idx + ai[i];
1549faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1550faf6580fSSatish Balay         rval      = ib[j]*2;
1551faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1552faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1553faf6580fSSatish Balay         v += 4;
1554faf6580fSSatish Balay       }
1555faf6580fSSatish Balay     }
1556faf6580fSSatish Balay     break;
1557faf6580fSSatish Balay   case 3:
1558faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1559faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1560faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1561faf6580fSSatish Balay       ib = idx + ai[i];
1562faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1563faf6580fSSatish Balay         rval      = ib[j]*3;
1564faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1565faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1566faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1567faf6580fSSatish Balay         v += 9;
1568faf6580fSSatish Balay       }
1569faf6580fSSatish Balay     }
1570faf6580fSSatish Balay     break;
1571faf6580fSSatish Balay   case 4:
1572faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1573faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1574faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1575faf6580fSSatish Balay       ib = idx + ai[i];
1576faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1577faf6580fSSatish Balay         rval      = ib[j]*4;
1578faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1579faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1580faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1581faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1582faf6580fSSatish Balay         v += 16;
1583faf6580fSSatish Balay       }
1584faf6580fSSatish Balay     }
1585faf6580fSSatish Balay     break;
1586faf6580fSSatish Balay   case 5:
1587faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1588faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1589faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1590faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1591faf6580fSSatish Balay       ib = idx + ai[i];
1592faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1593faf6580fSSatish Balay         rval      = ib[j]*5;
1594faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1595faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1596faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1597faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1598faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1599faf6580fSSatish Balay         v += 25;
1600faf6580fSSatish Balay       }
1601faf6580fSSatish Balay     }
1602faf6580fSSatish Balay     break;
1603faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1604faf6580fSSatish Balay     default: {
1605faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1606faf6580fSSatish Balay       if (!a->mult_work) {
1607faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1608faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1609faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1610faf6580fSSatish Balay       }
1611faf6580fSSatish Balay       work = a->mult_work;
1612faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1613faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1614faf6580fSSatish Balay         ncols = n*bs;
1615faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1616faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1617faf6580fSSatish Balay         v += n*bs2;
1618faf6580fSSatish Balay         x += bs;
1619faf6580fSSatish Balay         workt = work;
1620faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1621faf6580fSSatish Balay           zb = z + bs*(*idx++);
1622faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1623faf6580fSSatish Balay           workt += bs;
1624faf6580fSSatish Balay         }
1625faf6580fSSatish Balay       }
1626faf6580fSSatish Balay     }
1627faf6580fSSatish Balay   }
1628faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1629faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1630faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16312593348eSBarry Smith   return 0;
16322593348eSBarry Smith }
16332593348eSBarry Smith 
16345615d1e5SSatish Balay #undef __FUNC__
16355eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */
16368f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16372593348eSBarry Smith {
1638b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16394e220ebcSLois Curfman McInnes 
16404e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16414e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16424e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16434e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16444e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16454e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16464e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16474e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16484e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16494e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16504e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16514e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16524e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16534e220ebcSLois Curfman McInnes   if (A->factor) {
16544e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16554e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16564e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16574e220ebcSLois Curfman McInnes   } else {
16584e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16594e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16604e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16614e220ebcSLois Curfman McInnes   }
16622593348eSBarry Smith   return 0;
16632593348eSBarry Smith }
16642593348eSBarry Smith 
16655615d1e5SSatish Balay #undef __FUNC__
16665615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
16678f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
166891d316f6SSatish Balay {
166991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
167091d316f6SSatish Balay 
1671e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
167291d316f6SSatish Balay 
167391d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
167491d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1675a7c10996SSatish Balay       (a->nz != b->nz)) {
167691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
167791d316f6SSatish Balay   }
167891d316f6SSatish Balay 
167991d316f6SSatish Balay   /* if the a->i are the same */
168091d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
168191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168291d316f6SSatish Balay   }
168391d316f6SSatish Balay 
168491d316f6SSatish Balay   /* if a->j are the same */
168591d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
168691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168791d316f6SSatish Balay   }
168891d316f6SSatish Balay 
168991d316f6SSatish Balay   /* if a->a are the same */
169091d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
169191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169291d316f6SSatish Balay   }
169391d316f6SSatish Balay   *flg = PETSC_TRUE;
169491d316f6SSatish Balay   return 0;
169591d316f6SSatish Balay 
169691d316f6SSatish Balay }
169791d316f6SSatish Balay 
16985615d1e5SSatish Balay #undef __FUNC__
16995615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
17008f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
170191d316f6SSatish Balay {
170291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17037e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
170417e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
170517e48fc4SSatish Balay 
17060513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
170717e48fc4SSatish Balay   bs   = a->bs;
170817e48fc4SSatish Balay   aa   = a->a;
170917e48fc4SSatish Balay   ai   = a->i;
171017e48fc4SSatish Balay   aj   = a->j;
171117e48fc4SSatish Balay   ambs = a->mbs;
17129df24120SSatish Balay   bs2  = a->bs2;
171391d316f6SSatish Balay 
171491d316f6SSatish Balay   VecSet(&zero,v);
171590f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1716e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
171717e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
171817e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
171917e48fc4SSatish Balay       if (aj[j] == i) {
172017e48fc4SSatish Balay         row  = i*bs;
17217e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17227e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
172391d316f6SSatish Balay         break;
172491d316f6SSatish Balay       }
172591d316f6SSatish Balay     }
172691d316f6SSatish Balay   }
172791d316f6SSatish Balay   return 0;
172891d316f6SSatish Balay }
172991d316f6SSatish Balay 
17305615d1e5SSatish Balay #undef __FUNC__
17315615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17328f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17339867e207SSatish Balay {
17349867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17359867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17367e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17379867e207SSatish Balay 
17389867e207SSatish Balay   ai  = a->i;
17399867e207SSatish Balay   aj  = a->j;
17409867e207SSatish Balay   aa  = a->a;
17419867e207SSatish Balay   m   = a->m;
17429867e207SSatish Balay   n   = a->n;
17439867e207SSatish Balay   bs  = a->bs;
17449867e207SSatish Balay   mbs = a->mbs;
17459df24120SSatish Balay   bs2 = a->bs2;
17469867e207SSatish Balay   if (ll) {
174790f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1748e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17499867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17509867e207SSatish Balay       M  = ai[i+1] - ai[i];
17519867e207SSatish Balay       li = l + i*bs;
17527e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17539867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17547e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17559867e207SSatish Balay           (*v++) *= li[k%bs];
17569867e207SSatish Balay         }
17579867e207SSatish Balay       }
17589867e207SSatish Balay     }
17599867e207SSatish Balay   }
17609867e207SSatish Balay 
17619867e207SSatish Balay   if (rr) {
176290f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1763e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17649867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17659867e207SSatish Balay       M  = ai[i+1] - ai[i];
17667e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17679867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17689867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17699867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17709867e207SSatish Balay           x = ri[k];
17719867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17729867e207SSatish Balay         }
17739867e207SSatish Balay       }
17749867e207SSatish Balay     }
17759867e207SSatish Balay   }
17769867e207SSatish Balay   return 0;
17779867e207SSatish Balay }
17789867e207SSatish Balay 
17799867e207SSatish Balay 
1780b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1781b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17822a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1783736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1784736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17851a6a6d98SBarry Smith 
17861a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17871a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
17881a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
17891a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
17901a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
17911a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
179248e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
17931a6a6d98SBarry Smith 
17947fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
17957fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
17967fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
17977fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
17987fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
17997fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
18002593348eSBarry Smith 
18015615d1e5SSatish Balay #undef __FUNC__
18025615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
18038f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
18042593348eSBarry Smith {
1805b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18062593348eSBarry Smith   Scalar      *v = a->a;
18072593348eSBarry Smith   double      sum = 0.0;
18089df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18092593348eSBarry Smith 
18102593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18110419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18122593348eSBarry Smith #if defined(PETSC_COMPLEX)
18132593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18142593348eSBarry Smith #else
18152593348eSBarry Smith       sum += (*v)*(*v); v++;
18162593348eSBarry Smith #endif
18172593348eSBarry Smith     }
18182593348eSBarry Smith     *norm = sqrt(sum);
18192593348eSBarry Smith   }
18202593348eSBarry Smith   else {
1821e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18222593348eSBarry Smith   }
18232593348eSBarry Smith   return 0;
18242593348eSBarry Smith }
18252593348eSBarry Smith 
18262593348eSBarry Smith /*
18272593348eSBarry Smith      note: This can only work for identity for row and col. It would
18282593348eSBarry Smith    be good to check this and otherwise generate an error.
18292593348eSBarry Smith */
18305615d1e5SSatish Balay #undef __FUNC__
18315615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
18328f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18332593348eSBarry Smith {
1834b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18352593348eSBarry Smith   Mat         outA;
1836de6a44a3SBarry Smith   int         ierr;
18372593348eSBarry Smith 
1838e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18392593348eSBarry Smith 
18402593348eSBarry Smith   outA          = inA;
18412593348eSBarry Smith   inA->factor   = FACTOR_LU;
18422593348eSBarry Smith   a->row        = row;
18432593348eSBarry Smith   a->col        = col;
18442593348eSBarry Smith 
1845de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
18462593348eSBarry Smith 
18472593348eSBarry Smith   if (!a->diag) {
1848b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18492593348eSBarry Smith   }
18507fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18512593348eSBarry Smith   return 0;
18522593348eSBarry Smith }
18532593348eSBarry Smith 
18545615d1e5SSatish Balay #undef __FUNC__
18555615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
18568f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18572593348eSBarry Smith {
1858b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18599df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1860b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1861b6490206SBarry Smith   PLogFlops(totalnz);
18622593348eSBarry Smith   return 0;
18632593348eSBarry Smith }
18642593348eSBarry Smith 
18655615d1e5SSatish Balay #undef __FUNC__
18665615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18678f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18687e67e3f9SSatish Balay {
18697e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18707e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1871a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18729df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18737e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18747e67e3f9SSatish Balay 
18757e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
18767e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1877e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1878e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1879a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
18807e67e3f9SSatish Balay     nrow = ailen[brow];
18817e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1882e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1883e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1884a7c10996SSatish Balay       col  = in[l] ;
18857e67e3f9SSatish Balay       bcol = col/bs;
18867e67e3f9SSatish Balay       cidx = col%bs;
18877e67e3f9SSatish Balay       ridx = row%bs;
18887e67e3f9SSatish Balay       high = nrow;
18897e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
18907e67e3f9SSatish Balay       while (high-low > 5) {
18917e67e3f9SSatish Balay         t = (low+high)/2;
18927e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
18937e67e3f9SSatish Balay         else             low  = t;
18947e67e3f9SSatish Balay       }
18957e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
18967e67e3f9SSatish Balay         if (rp[i] > bcol) break;
18977e67e3f9SSatish Balay         if (rp[i] == bcol) {
18987e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18997e67e3f9SSatish Balay           goto finished;
19007e67e3f9SSatish Balay         }
19017e67e3f9SSatish Balay       }
19027e67e3f9SSatish Balay       *v++ = zero;
19037e67e3f9SSatish Balay       finished:;
19047e67e3f9SSatish Balay     }
19057e67e3f9SSatish Balay   }
19067e67e3f9SSatish Balay   return 0;
19077e67e3f9SSatish Balay }
19087e67e3f9SSatish Balay 
19095615d1e5SSatish Balay #undef __FUNC__
19105eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */
19118f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19125a838052SSatish Balay {
19135a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19145a838052SSatish Balay   *bs = baij->bs;
19155a838052SSatish Balay   return 0;
19165a838052SSatish Balay }
19175a838052SSatish Balay 
1918d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19195615d1e5SSatish Balay #undef __FUNC__
19205615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1921d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1922d9b7c43dSSatish Balay {
1923d9b7c43dSSatish Balay   int i,row;
1924d9b7c43dSSatish Balay   row = idx[0];
1925d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1926d9b7c43dSSatish Balay 
1927d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1928d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1929d9b7c43dSSatish Balay   }
1930d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1931d9b7c43dSSatish Balay   return 0;
1932d9b7c43dSSatish Balay }
1933d9b7c43dSSatish Balay 
19345615d1e5SSatish Balay #undef __FUNC__
19355615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
19368f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1937d9b7c43dSSatish Balay {
1938d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1939d9b7c43dSSatish Balay   IS          is_local;
1940d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1941d9b7c43dSSatish Balay   PetscTruth  flg;
1942d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1943d9b7c43dSSatish Balay 
1944d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1945d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1946d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1947537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1948d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1949d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1950d9b7c43dSSatish Balay 
1951d9b7c43dSSatish Balay   i = 0;
1952d9b7c43dSSatish Balay   while (i < is_n) {
1953e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1954d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1955d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1956d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1957d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1958d9b7c43dSSatish Balay       i += bs;
1959d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1960d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1961d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1962d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1963d9b7c43dSSatish Balay         aa[0] = zero;
1964d9b7c43dSSatish Balay         aa+=bs;
1965d9b7c43dSSatish Balay       }
1966d9b7c43dSSatish Balay       i++;
1967d9b7c43dSSatish Balay     }
1968d9b7c43dSSatish Balay   }
1969d9b7c43dSSatish Balay   if (diag) {
1970d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1971d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1972d9b7c43dSSatish Balay     }
1973d9b7c43dSSatish Balay   }
1974d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1975d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1976d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
19779a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1978d9b7c43dSSatish Balay 
1979d9b7c43dSSatish Balay   return 0;
1980d9b7c43dSSatish Balay }
19811c351548SSatish Balay 
19825615d1e5SSatish Balay #undef __FUNC__
19835eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */
1984ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1985ba4ca20aSSatish Balay {
1986ba4ca20aSSatish Balay   static int called = 0;
1987ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1988ba4ca20aSSatish Balay 
1989ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1990ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1991ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1992ba4ca20aSSatish Balay   return 0;
1993ba4ca20aSSatish Balay }
1994d9b7c43dSSatish Balay 
19952593348eSBarry Smith /* -------------------------------------------------------------------*/
1996cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
19979867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1998f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1999faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
20001a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
2001b6490206SBarry Smith        0,0,
2002de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
2003b6490206SBarry Smith        0,
2004f2501298SSatish Balay        MatTranspose_SeqBAIJ,
200517e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
20069867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2007584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2008b6490206SBarry Smith        0,
2009d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20107fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2011b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2012de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2013d402145bSBarry Smith        0,0,
2014b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2015b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2016af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20177e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2018ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20193b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20203b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
202192c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
202292c4ed94SBarry Smith        0,0,0,0,0,0,
202392c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20242593348eSBarry Smith 
20255615d1e5SSatish Balay #undef __FUNC__
20265615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20272593348eSBarry Smith /*@C
202844cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
202944cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
203044cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20312bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20322bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20332593348eSBarry Smith 
20342593348eSBarry Smith    Input Parameters:
2035029af93fSBarry Smith .  comm - MPI communicator, set to PETSC_COMM_SELF
2036b6490206SBarry Smith .  bs - size of block
20372593348eSBarry Smith .  m - number of rows
20382593348eSBarry Smith .  n - number of columns
2039b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20402bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20412bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20422593348eSBarry Smith 
20432593348eSBarry Smith    Output Parameter:
20442593348eSBarry Smith .  A - the matrix
20452593348eSBarry Smith 
20460513a670SBarry Smith    Options Database Keys:
20470513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20480513a670SBarry Smith $                     block calculations (much solver)
20490513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20500513a670SBarry Smith 
20512593348eSBarry Smith    Notes:
205244cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20532593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
205444cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20552593348eSBarry Smith 
20562593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20572593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20582593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20596da5968aSLois Curfman McInnes    matrices.
20602593348eSBarry Smith 
206144cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20622593348eSBarry Smith @*/
2063b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20642593348eSBarry Smith {
20652593348eSBarry Smith   Mat         B;
2066b6490206SBarry Smith   Mat_SeqBAIJ *b;
20673b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20683b2fbd54SBarry Smith 
20693b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2070e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2071b6490206SBarry Smith 
20720513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20730513a670SBarry Smith 
2074f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2075e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
20762593348eSBarry Smith 
20772593348eSBarry Smith   *A = 0;
2078*f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
20792593348eSBarry Smith   PLogObjectCreate(B);
2080b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
208144cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
20822593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
20831a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
20841a6a6d98SBarry Smith   if (!flg) {
20857fc0212eSBarry Smith     switch (bs) {
20867fc0212eSBarry Smith     case 1:
20877fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20881a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
208939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2090f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
20917fc0212eSBarry Smith       break;
20924eeb42bcSBarry Smith     case 2:
20934eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
20941a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
209539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2096f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
20976d84be18SBarry Smith       break;
2098f327dd97SBarry Smith     case 3:
2099f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
21001a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
210139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2102f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
21034eeb42bcSBarry Smith       break;
21046d84be18SBarry Smith     case 4:
21056d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
21061a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
210739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2108f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21096d84be18SBarry Smith       break;
21106d84be18SBarry Smith     case 5:
21116d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21121a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
211339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2114f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21156d84be18SBarry Smith       break;
211648e9ddb2SSatish Balay     case 7:
211748e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
211848e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
211948e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
212048e9ddb2SSatish Balay       break;
21217fc0212eSBarry Smith     }
21221a6a6d98SBarry Smith   }
2123b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2124b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21252593348eSBarry Smith   B->factor           = 0;
21262593348eSBarry Smith   B->lupivotthreshold = 1.0;
212790f02eecSBarry Smith   B->mapping          = 0;
21282593348eSBarry Smith   b->row              = 0;
21292593348eSBarry Smith   b->col              = 0;
21302593348eSBarry Smith   b->reallocs         = 0;
21312593348eSBarry Smith 
213244cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
213344cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2134b6490206SBarry Smith   b->mbs     = mbs;
2135f2501298SSatish Balay   b->nbs     = nbs;
2136b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21372593348eSBarry Smith   if (nnz == PETSC_NULL) {
2138119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21392593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2140b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2141b6490206SBarry Smith     nz = nz*mbs;
21422593348eSBarry Smith   }
21432593348eSBarry Smith   else {
21442593348eSBarry Smith     nz = 0;
2145b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21462593348eSBarry Smith   }
21472593348eSBarry Smith 
21482593348eSBarry Smith   /* allocate the matrix space */
21497e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21502593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21517e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21527e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21532593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21542593348eSBarry Smith   b->i  = b->j + nz;
21552593348eSBarry Smith   b->singlemalloc = 1;
21562593348eSBarry Smith 
2157de6a44a3SBarry Smith   b->i[0] = 0;
2158b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21592593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21602593348eSBarry Smith   }
21612593348eSBarry Smith 
2162b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2163b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2164*f09e8eb9SSatish Balay   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2165b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21662593348eSBarry Smith 
2167b6490206SBarry Smith   b->bs               = bs;
21689df24120SSatish Balay   b->bs2              = bs2;
2169b6490206SBarry Smith   b->mbs              = mbs;
21702593348eSBarry Smith   b->nz               = 0;
217118e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21722593348eSBarry Smith   b->sorted           = 0;
21732593348eSBarry Smith   b->roworiented      = 1;
21742593348eSBarry Smith   b->nonew            = 0;
21752593348eSBarry Smith   b->diag             = 0;
21762593348eSBarry Smith   b->solve_work       = 0;
2177de6a44a3SBarry Smith   b->mult_work        = 0;
21782593348eSBarry Smith   b->spptr            = 0;
21794e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
21804e220ebcSLois Curfman McInnes 
21812593348eSBarry Smith   *A = B;
21822593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
21832593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
21842593348eSBarry Smith   return 0;
21852593348eSBarry Smith }
21862593348eSBarry Smith 
21875615d1e5SSatish Balay #undef __FUNC__
21885615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2189b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
21902593348eSBarry Smith {
21912593348eSBarry Smith   Mat         C;
2192b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
21939df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2194de6a44a3SBarry Smith 
2195e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
21962593348eSBarry Smith 
21972593348eSBarry Smith   *B = 0;
2198*f09e8eb9SSatish Balay   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
21992593348eSBarry Smith   PLogObjectCreate(C);
2200b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
22012593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2202b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2203b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
22042593348eSBarry Smith   C->factor     = A->factor;
22052593348eSBarry Smith   c->row        = 0;
22062593348eSBarry Smith   c->col        = 0;
22072593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22082593348eSBarry Smith 
220944cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
221044cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
221144cd7ae7SLois Curfman McInnes   C->M          = a->m;
221244cd7ae7SLois Curfman McInnes   C->N          = a->n;
221344cd7ae7SLois Curfman McInnes 
2214b6490206SBarry Smith   c->bs         = a->bs;
22159df24120SSatish Balay   c->bs2        = a->bs2;
2216b6490206SBarry Smith   c->mbs        = a->mbs;
2217b6490206SBarry Smith   c->nbs        = a->nbs;
22182593348eSBarry Smith 
2219b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2220b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2221b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22222593348eSBarry Smith     c->imax[i] = a->imax[i];
22232593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22242593348eSBarry Smith   }
22252593348eSBarry Smith 
22262593348eSBarry Smith   /* allocate the matrix space */
22272593348eSBarry Smith   c->singlemalloc = 1;
22287e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22292593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22307e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2231de6a44a3SBarry Smith   c->i  = c->j + nz;
2232b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2233b6490206SBarry Smith   if (mbs > 0) {
2234de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22352593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22367e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22372593348eSBarry Smith     }
22382593348eSBarry Smith   }
22392593348eSBarry Smith 
2240*f09e8eb9SSatish Balay   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
22412593348eSBarry Smith   c->sorted      = a->sorted;
22422593348eSBarry Smith   c->roworiented = a->roworiented;
22432593348eSBarry Smith   c->nonew       = a->nonew;
22442593348eSBarry Smith 
22452593348eSBarry Smith   if (a->diag) {
2246b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2247b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2248b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22492593348eSBarry Smith       c->diag[i] = a->diag[i];
22502593348eSBarry Smith     }
22512593348eSBarry Smith   }
22522593348eSBarry Smith   else c->diag          = 0;
22532593348eSBarry Smith   c->nz                 = a->nz;
22542593348eSBarry Smith   c->maxnz              = a->maxnz;
22552593348eSBarry Smith   c->solve_work         = 0;
22562593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22577fc0212eSBarry Smith   c->mult_work          = 0;
22582593348eSBarry Smith   *B = C;
22592593348eSBarry Smith   return 0;
22602593348eSBarry Smith }
22612593348eSBarry Smith 
22625615d1e5SSatish Balay #undef __FUNC__
22635615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
226419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22652593348eSBarry Smith {
2266b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22672593348eSBarry Smith   Mat          B;
2268de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2269b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
227035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2271a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2272b6490206SBarry Smith   Scalar       *aa;
227319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22742593348eSBarry Smith 
22751a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2276de6a44a3SBarry Smith   bs2  = bs*bs;
2277b6490206SBarry Smith 
22782593348eSBarry Smith   MPI_Comm_size(comm,&size);
2279e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
228090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
228177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2282e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
22832593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
22842593348eSBarry Smith 
2285e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
228635aab85fSBarry Smith 
228735aab85fSBarry Smith   /*
228835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
228935aab85fSBarry Smith     divisible by the blocksize
229035aab85fSBarry Smith   */
2291b6490206SBarry Smith   mbs        = M/bs;
229235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
229335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
229435aab85fSBarry Smith   else                  mbs++;
229535aab85fSBarry Smith   if (extra_rows) {
2296537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
229735aab85fSBarry Smith   }
2298b6490206SBarry Smith 
22992593348eSBarry Smith   /* read in row lengths */
230035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
230177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
230235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
23032593348eSBarry Smith 
2304b6490206SBarry Smith   /* read in column indices */
230535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
230677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
230735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2308b6490206SBarry Smith 
2309b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2310b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2311b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
231235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
231335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
231435aab85fSBarry Smith   masked = mask + mbs;
2315b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2316b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
231735aab85fSBarry Smith     nmask = 0;
2318b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2319b6490206SBarry Smith       kmax = rowlengths[rowcount];
2320b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
232135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
232235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2323b6490206SBarry Smith       }
2324b6490206SBarry Smith       rowcount++;
2325b6490206SBarry Smith     }
232635aab85fSBarry Smith     browlengths[i] += nmask;
232735aab85fSBarry Smith     /* zero out the mask elements we set */
232835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2329b6490206SBarry Smith   }
2330b6490206SBarry Smith 
23312593348eSBarry Smith   /* create our matrix */
233235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
233335aab85fSBarry Smith          CHKERRQ(ierr);
23342593348eSBarry Smith   B = *A;
2335b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23362593348eSBarry Smith 
23372593348eSBarry Smith   /* set matrix "i" values */
2338de6a44a3SBarry Smith   a->i[0] = 0;
2339b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2340b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2341b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23422593348eSBarry Smith   }
2343b6490206SBarry Smith   a->nz         = 0;
2344b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23452593348eSBarry Smith 
2346b6490206SBarry Smith   /* read in nonzero values */
234735aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
234877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
234935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2350b6490206SBarry Smith 
2351b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2352b6490206SBarry Smith   nzcount = 0; jcount = 0;
2353b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2354b6490206SBarry Smith     nzcountb = nzcount;
235535aab85fSBarry Smith     nmask    = 0;
2356b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2357b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2358b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
235935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
236035aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2361b6490206SBarry Smith       }
2362b6490206SBarry Smith       rowcount++;
2363b6490206SBarry Smith     }
2364de6a44a3SBarry Smith     /* sort the masked values */
236577c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2366de6a44a3SBarry Smith 
2367b6490206SBarry Smith     /* set "j" values into matrix */
2368b6490206SBarry Smith     maskcount = 1;
236935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
237035aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2371de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2372b6490206SBarry Smith     }
2373b6490206SBarry Smith     /* set "a" values into matrix */
2374de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2375b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2376b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2377b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2378de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2379de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2380de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2381de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2382b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2383b6490206SBarry Smith       }
2384b6490206SBarry Smith     }
238535aab85fSBarry Smith     /* zero out the mask elements we set */
238635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2387b6490206SBarry Smith   }
2388e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2389b6490206SBarry Smith 
2390b6490206SBarry Smith   PetscFree(rowlengths);
2391b6490206SBarry Smith   PetscFree(browlengths);
2392b6490206SBarry Smith   PetscFree(aa);
2393b6490206SBarry Smith   PetscFree(jj);
2394b6490206SBarry Smith   PetscFree(mask);
2395b6490206SBarry Smith 
2396b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2397de6a44a3SBarry Smith 
2398de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2399de6a44a3SBarry Smith   if (flg) {
240019bcc07fSBarry Smith     Viewer tviewer;
240119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2402639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
240319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
240419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2405de6a44a3SBarry Smith   }
2406de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2407de6a44a3SBarry Smith   if (flg) {
240819bcc07fSBarry Smith     Viewer tviewer;
240919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2410639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
241119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
241219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2413de6a44a3SBarry Smith   }
2414de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2415de6a44a3SBarry Smith   if (flg) {
241619bcc07fSBarry Smith     Viewer tviewer;
241719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
241819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
241919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2420de6a44a3SBarry Smith   }
2421de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2422de6a44a3SBarry Smith   if (flg) {
242319bcc07fSBarry Smith     Viewer tviewer;
242419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2425639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
242619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
242719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2428de6a44a3SBarry Smith   }
2429de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2430de6a44a3SBarry Smith   if (flg) {
243119bcc07fSBarry Smith     Viewer tviewer;
243219bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
243319bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
243419bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
243519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2436de6a44a3SBarry Smith   }
24372593348eSBarry Smith   return 0;
24382593348eSBarry Smith }
24392593348eSBarry Smith 
24402593348eSBarry Smith 
24412593348eSBarry Smith 
2442