xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 92c4ed94046817a2881466ca8f206a5effe0c57b)
12593348eSBarry Smith #ifndef lint
2*92c4ed94SBarry Smith static char vcid[] = "$Id: baij.c,v 1.86 1997/01/29 19:45:09 balay Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
195615d1e5SSatish Balay #undef __FUNC__
205615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
22de6a44a3SBarry Smith {
23de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
247fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
25de6a44a3SBarry Smith 
26de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
287fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
29de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30de6a44a3SBarry Smith       if (a->j[j] == i) {
31de6a44a3SBarry Smith         diag[i] = j;
32de6a44a3SBarry Smith         break;
33de6a44a3SBarry Smith       }
34de6a44a3SBarry Smith     }
35de6a44a3SBarry Smith   }
36de6a44a3SBarry Smith   a->diag = diag;
37de6a44a3SBarry Smith   return 0;
38de6a44a3SBarry Smith }
392593348eSBarry Smith 
402593348eSBarry Smith #include "draw.h"
412593348eSBarry Smith #include "pinclude/pviewer.h"
4277c4ece6SBarry Smith #include "sys.h"
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
725615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
935615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
1197e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
1347e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
1515615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes #else
18144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18344cd7ae7SLois Curfman McInnes #endif
18444cd7ae7SLois Curfman McInnes           }
18544cd7ae7SLois Curfman McInnes         }
18644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18744cd7ae7SLois Curfman McInnes       }
18844cd7ae7SLois Curfman McInnes     }
18944cd7ae7SLois Curfman McInnes   }
1902593348eSBarry Smith   else {
191b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
192b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
193b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
194b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1977e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1997e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20088685aaeSLois Curfman McInnes           }
20188685aaeSLois Curfman McInnes           else {
2027e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes #else
2057e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20688685aaeSLois Curfman McInnes #endif
2072593348eSBarry Smith           }
2082593348eSBarry Smith         }
2092593348eSBarry Smith         fprintf(fd,"\n");
2102593348eSBarry Smith       }
2112593348eSBarry Smith     }
212b6490206SBarry Smith   }
2132593348eSBarry Smith   fflush(fd);
2142593348eSBarry Smith   return 0;
2152593348eSBarry Smith }
2162593348eSBarry Smith 
2175615d1e5SSatish Balay #undef __FUNC__
2185615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Draw"
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
245b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
259b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
274b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
305b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
319b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
334b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
3445615d1e5SSatish Balay #undef __FUNC__
3455615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ"
346b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3472593348eSBarry Smith {
3482593348eSBarry Smith   Mat         A = (Mat) obj;
34919bcc07fSBarry Smith   ViewerType  vtype;
35019bcc07fSBarry Smith   int         ierr;
3512593348eSBarry Smith 
35219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35319bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
354e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
360b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3612593348eSBarry Smith   }
36219bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3633270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3642593348eSBarry Smith   }
3652593348eSBarry Smith   return 0;
3662593348eSBarry Smith }
367b6490206SBarry Smith 
368119a934aSSatish Balay #define CHUNKSIZE  10
369cd0e1443SSatish Balay 
3705615d1e5SSatish Balay #undef __FUNC__
3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373cd0e1443SSatish Balay {
374cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3789df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
379cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
380cd0e1443SSatish Balay 
381cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
382cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
384e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
385e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3863b2fbd54SBarry Smith #endif
3872c3acbe9SBarry Smith     rp   = aj + ai[brow];
3882c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3892c3acbe9SBarry Smith     rmax = imax[brow];
3902c3acbe9SBarry Smith     nrow = ailen[brow];
391cd0e1443SSatish Balay     low  = 0;
392cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
394e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
3963b2fbd54SBarry Smith #endif
397a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
398cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
399cd0e1443SSatish Balay       if (roworiented) {
400cd0e1443SSatish Balay         value = *v++;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       else {
403cd0e1443SSatish Balay         value = v[k + l*m];
404cd0e1443SSatish Balay       }
405cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4062c3acbe9SBarry Smith       while (high-low > 7) {
407cd0e1443SSatish Balay         t = (low+high)/2;
408cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
409cd0e1443SSatish Balay         else              low  = t;
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
412cd0e1443SSatish Balay         if (rp[i] > bcol) break;
413cd0e1443SSatish Balay         if (rp[i] == bcol) {
4147e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
415cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
416cd0e1443SSatish Balay           else                  *bap  = value;
417cd0e1443SSatish Balay           goto noinsert;
418cd0e1443SSatish Balay         }
419cd0e1443SSatish Balay       }
420cd0e1443SSatish Balay       if (nonew) goto noinsert;
421cd0e1443SSatish Balay       if (nrow >= rmax) {
422cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
423cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424cd0e1443SSatish Balay         Scalar *new_a;
425cd0e1443SSatish Balay 
426cd0e1443SSatish Balay         /* malloc new storage space */
4277e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4297e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
430cd0e1443SSatish Balay         new_i   = new_j + new_nz;
431cd0e1443SSatish Balay 
432cd0e1443SSatish Balay         /* copy over old data into new slots */
433cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4347e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438cd0e1443SSatish Balay                                                            len*sizeof(int));
439a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443cd0e1443SSatish Balay         /* free up old matrix storage */
444cd0e1443SSatish Balay         PetscFree(a->a);
445cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447cd0e1443SSatish Balay         a->singlemalloc = 1;
448cd0e1443SSatish Balay 
449a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4517e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
45218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
453cd0e1443SSatish Balay         a->reallocs++;
454119a934aSSatish Balay         a->nz++;
455cd0e1443SSatish Balay       }
4567e67e3f9SSatish Balay       N = nrow++ - 1;
457cd0e1443SSatish Balay       /* shift up all the later entries in this row */
458cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
459cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4607e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461cd0e1443SSatish Balay       }
4627e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463cd0e1443SSatish Balay       rp[i]                      = bcol;
4647e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
465cd0e1443SSatish Balay       noinsert:;
466cd0e1443SSatish Balay       low = i;
467cd0e1443SSatish Balay     }
468cd0e1443SSatish Balay     ailen[brow] = nrow;
469cd0e1443SSatish Balay   }
470cd0e1443SSatish Balay   return 0;
471cd0e1443SSatish Balay }
472cd0e1443SSatish Balay 
4735615d1e5SSatish Balay #undef __FUNC__
474*92c4ed94SBarry Smith #define __FUNC__ "MatSetValues_SeqBAIJ"
475*92c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
476*92c4ed94SBarry Smith {
477*92c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
478*92c4ed94SBarry Smith   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
479*92c4ed94SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
480*92c4ed94SBarry Smith   int         *aj=a->j,nonew=a->nonew,bs=a->bs;
481*92c4ed94SBarry Smith   int          ridx,cidx,bs2=a->bs2;
482*92c4ed94SBarry Smith   Scalar      *ap,value,*aa=a->a,*bap;
483*92c4ed94SBarry Smith 
484*92c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
485*92c4ed94SBarry Smith     row  = im[k];
486*92c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
487*92c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
488*92c4ed94SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
489*92c4ed94SBarry Smith #endif
490*92c4ed94SBarry Smith     rp   = aj + ai[row];
491*92c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
492*92c4ed94SBarry Smith     rmax = imax[row];
493*92c4ed94SBarry Smith     nrow = ailen[row];
494*92c4ed94SBarry Smith     low  = 0;
495*92c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
496*92c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
497*92c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
498*92c4ed94SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
499*92c4ed94SBarry Smith #endif
500*92c4ed94SBarry Smith       col = in[l];
501*92c4ed94SBarry Smith       ridx = row % bs; cidx = col % bs;
502*92c4ed94SBarry Smith       if (roworiented) {
503*92c4ed94SBarry Smith         value = *v++;
504*92c4ed94SBarry Smith       }
505*92c4ed94SBarry Smith       else {
506*92c4ed94SBarry Smith         value = v[k + l*m];
507*92c4ed94SBarry Smith       }
508*92c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
509*92c4ed94SBarry Smith       while (high-low > 7) {
510*92c4ed94SBarry Smith         t = (low+high)/2;
511*92c4ed94SBarry Smith         if (rp[t] > col) high = t;
512*92c4ed94SBarry Smith         else             low  = t;
513*92c4ed94SBarry Smith       }
514*92c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
515*92c4ed94SBarry Smith         if (rp[i] > col) break;
516*92c4ed94SBarry Smith         if (rp[i] == col) {
517*92c4ed94SBarry Smith           bap  = ap +  bs2*i + bs*cidx + ridx;
518*92c4ed94SBarry Smith           if (is == ADD_VALUES) *bap += value;
519*92c4ed94SBarry Smith           else                  *bap  = value;
520*92c4ed94SBarry Smith           goto noinsert;
521*92c4ed94SBarry Smith         }
522*92c4ed94SBarry Smith       }
523*92c4ed94SBarry Smith       if (nonew) goto noinsert;
524*92c4ed94SBarry Smith       if (nrow >= rmax) {
525*92c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
526*92c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
527*92c4ed94SBarry Smith         Scalar *new_a;
528*92c4ed94SBarry Smith 
529*92c4ed94SBarry Smith         /* malloc new storage space */
530*92c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
531*92c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
532*92c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
533*92c4ed94SBarry Smith         new_i   = new_j + new_nz;
534*92c4ed94SBarry Smith 
535*92c4ed94SBarry Smith         /* copy over old data into new slots */
536*92c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
537*92c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
538*92c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
539*92c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
540*92c4ed94SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
541*92c4ed94SBarry Smith                                                            len*sizeof(int));
542*92c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
543*92c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
544*92c4ed94SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
545*92c4ed94SBarry Smith                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
546*92c4ed94SBarry Smith         /* free up old matrix storage */
547*92c4ed94SBarry Smith         PetscFree(a->a);
548*92c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
549*92c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
550*92c4ed94SBarry Smith         a->singlemalloc = 1;
551*92c4ed94SBarry Smith 
552*92c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
553*92c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
554*92c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
555*92c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
556*92c4ed94SBarry Smith         a->reallocs++;
557*92c4ed94SBarry Smith         a->nz++;
558*92c4ed94SBarry Smith       }
559*92c4ed94SBarry Smith       N = nrow++ - 1;
560*92c4ed94SBarry Smith       /* shift up all the later entries in this row */
561*92c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
562*92c4ed94SBarry Smith         rp[ii+1] = rp[ii];
563*92c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
564*92c4ed94SBarry Smith       }
565*92c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
566*92c4ed94SBarry Smith       rp[i]                      = col;
567*92c4ed94SBarry Smith       ap[bs2*i + bs*cidx + ridx] = value;
568*92c4ed94SBarry Smith       noinsert:;
569*92c4ed94SBarry Smith       low = i;
570*92c4ed94SBarry Smith     }
571*92c4ed94SBarry Smith     ailen[row] = nrow;
572*92c4ed94SBarry Smith   }
573*92c4ed94SBarry Smith   return 0;
574*92c4ed94SBarry Smith }
575*92c4ed94SBarry Smith 
576*92c4ed94SBarry Smith #undef __FUNC__
5775615d1e5SSatish Balay #define __FUNC__ "MatGetSize_SeqBAIJ"
5780b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
5790b824a48SSatish Balay {
5800b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5810b824a48SSatish Balay   *m = a->m; *n = a->n;
5820b824a48SSatish Balay   return 0;
5830b824a48SSatish Balay }
5840b824a48SSatish Balay 
5855615d1e5SSatish Balay #undef __FUNC__
5865615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
5870b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
5880b824a48SSatish Balay {
5890b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5900b824a48SSatish Balay   *m = 0; *n = a->m;
5910b824a48SSatish Balay   return 0;
5920b824a48SSatish Balay }
5930b824a48SSatish Balay 
5945615d1e5SSatish Balay #undef __FUNC__
5955615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
5969867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5979867e207SSatish Balay {
5989867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5997e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6009867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6019867e207SSatish Balay 
6029867e207SSatish Balay   bs  = a->bs;
6039867e207SSatish Balay   ai  = a->i;
6049867e207SSatish Balay   aj  = a->j;
6059867e207SSatish Balay   aa  = a->a;
6069df24120SSatish Balay   bs2 = a->bs2;
6079867e207SSatish Balay 
608e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6099867e207SSatish Balay 
6109867e207SSatish Balay   bn  = row/bs;   /* Block number */
6119867e207SSatish Balay   bp  = row % bs; /* Block Position */
6129867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6139867e207SSatish Balay   *nz = bs*M;
6149867e207SSatish Balay 
6159867e207SSatish Balay   if (v) {
6169867e207SSatish Balay     *v = 0;
6179867e207SSatish Balay     if (*nz) {
6189867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6199867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6209867e207SSatish Balay         v_i  = *v + i*bs;
6217e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6227e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6239867e207SSatish Balay       }
6249867e207SSatish Balay     }
6259867e207SSatish Balay   }
6269867e207SSatish Balay 
6279867e207SSatish Balay   if (idx) {
6289867e207SSatish Balay     *idx = 0;
6299867e207SSatish Balay     if (*nz) {
6309867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6319867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6329867e207SSatish Balay         idx_i = *idx + i*bs;
6339867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6349867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6359867e207SSatish Balay       }
6369867e207SSatish Balay     }
6379867e207SSatish Balay   }
6389867e207SSatish Balay   return 0;
6399867e207SSatish Balay }
6409867e207SSatish Balay 
6415615d1e5SSatish Balay #undef __FUNC__
6425615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6439867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6449867e207SSatish Balay {
6459867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6469867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
6479867e207SSatish Balay   return 0;
6489867e207SSatish Balay }
649b6490206SBarry Smith 
6505615d1e5SSatish Balay #undef __FUNC__
6515615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
652f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
653f2501298SSatish Balay {
654f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
655f2501298SSatish Balay   Mat         C;
656f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
6579df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
658f2501298SSatish Balay   Scalar      *array=a->a;
659f2501298SSatish Balay 
660f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
661e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
662f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
663f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
664a7c10996SSatish Balay 
665a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
666f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
667f2501298SSatish Balay   PetscFree(col);
668f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
669f2501298SSatish Balay   cols = rows + bs;
670f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
671f2501298SSatish Balay     cols[0] = i*bs;
672f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
673f2501298SSatish Balay     len = ai[i+1] - ai[i];
674f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
675f2501298SSatish Balay       rows[0] = (*aj++)*bs;
676f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
677f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
678f2501298SSatish Balay       array += bs2;
679f2501298SSatish Balay     }
680f2501298SSatish Balay   }
6811073c447SSatish Balay   PetscFree(rows);
682f2501298SSatish Balay 
6836d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6846d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
685f2501298SSatish Balay 
686f2501298SSatish Balay   if (B != PETSC_NULL) {
687f2501298SSatish Balay     *B = C;
688f2501298SSatish Balay   } else {
689f2501298SSatish Balay     /* This isn't really an in-place transpose */
690f2501298SSatish Balay     PetscFree(a->a);
691f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
692f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
693f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
694f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
695f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
696f2501298SSatish Balay     PetscFree(a);
697f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
698f2501298SSatish Balay     PetscHeaderDestroy(C);
699f2501298SSatish Balay   }
700f2501298SSatish Balay   return 0;
701f2501298SSatish Balay }
702f2501298SSatish Balay 
703f2501298SSatish Balay 
7045615d1e5SSatish Balay #undef __FUNC__
7055615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
706584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
707584200bdSSatish Balay {
708584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
709584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
710a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
711d402145bSBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax;
712584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
713584200bdSSatish Balay 
7146d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
715584200bdSSatish Balay 
716d402145bSBarry Smith   rmax = ailen[0];
717584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
718584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
719584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
720d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
721584200bdSSatish Balay     if (fshift) {
722a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
723584200bdSSatish Balay       N = ailen[i];
724584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
725584200bdSSatish Balay         ip[j-fshift] = ip[j];
7267e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
727584200bdSSatish Balay       }
728584200bdSSatish Balay     }
729584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
730584200bdSSatish Balay   }
731584200bdSSatish Balay   if (mbs) {
732584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
733584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
734584200bdSSatish Balay   }
735584200bdSSatish Balay   /* reset ilen and imax for each row */
736584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
737584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
738584200bdSSatish Balay   }
739a7c10996SSatish Balay   a->nz = ai[mbs];
740584200bdSSatish Balay 
741584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
742584200bdSSatish Balay   if (fshift && a->diag) {
743584200bdSSatish Balay     PetscFree(a->diag);
744584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
745584200bdSSatish Balay     a->diag = 0;
746584200bdSSatish Balay   }
7473d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
748219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
7493d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
750584200bdSSatish Balay            a->reallocs);
751d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
752e2f3b5e9SSatish Balay   a->reallocs          = 0;
7534e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
7544e220ebcSLois Curfman McInnes 
755584200bdSSatish Balay   return 0;
756584200bdSSatish Balay }
757584200bdSSatish Balay 
7585615d1e5SSatish Balay #undef __FUNC__
7595615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
760b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
7612593348eSBarry Smith {
762b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7639df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
7642593348eSBarry Smith   return 0;
7652593348eSBarry Smith }
7662593348eSBarry Smith 
7675615d1e5SSatish Balay #undef __FUNC__
7685615d1e5SSatish Balay #define __FUNC__ "MatDestroy_SeqBAIJ"
769b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
7702593348eSBarry Smith {
7712593348eSBarry Smith   Mat         A  = (Mat) obj;
772b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77390f02eecSBarry Smith   int         ierr;
7742593348eSBarry Smith 
7752593348eSBarry Smith #if defined(PETSC_LOG)
7762593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
7772593348eSBarry Smith #endif
7782593348eSBarry Smith   PetscFree(a->a);
7792593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
7802593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
7812593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
7822593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
7832593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
784de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
7852593348eSBarry Smith   PetscFree(a);
78690f02eecSBarry Smith   if (A->mapping) {
78790f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
78890f02eecSBarry Smith   }
789f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
790f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
7912593348eSBarry Smith   return 0;
7922593348eSBarry Smith }
7932593348eSBarry Smith 
7945615d1e5SSatish Balay #undef __FUNC__
7955615d1e5SSatish Balay #define __FUNC__ "MatSetOption_SeqBAIJ"
796b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
7972593348eSBarry Smith {
798b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7996d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
8006d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
8016d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
802219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
8036d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
8046d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
8056d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
806219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8076d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8086d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
80990f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
81090f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
81194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8126d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
813e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8142593348eSBarry Smith   else
815e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8162593348eSBarry Smith   return 0;
8172593348eSBarry Smith }
8182593348eSBarry Smith 
8192593348eSBarry Smith 
8202593348eSBarry Smith /* -------------------------------------------------------*/
8212593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8222593348eSBarry Smith /* -------------------------------------------------------*/
823b6490206SBarry Smith #include "pinclude/plapack.h"
824b6490206SBarry Smith 
8255615d1e5SSatish Balay #undef __FUNC__
8265615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
82739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8282593348eSBarry Smith {
829b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
83039b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
831c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8322593348eSBarry Smith 
833c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
834c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
835b6490206SBarry Smith 
836119a934aSSatish Balay   idx   = a->j;
837119a934aSSatish Balay   v     = a->a;
838119a934aSSatish Balay   ii    = a->i;
839119a934aSSatish Balay 
840119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
841119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
842119a934aSSatish Balay     sum  = 0.0;
843119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8441a6a6d98SBarry Smith     z[i] = sum;
845119a934aSSatish Balay   }
846c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
847c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
84839b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
84939b95ed1SBarry Smith   return 0;
85039b95ed1SBarry Smith }
85139b95ed1SBarry Smith 
8525615d1e5SSatish Balay #undef __FUNC__
8535615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
85439b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
85539b95ed1SBarry Smith {
85639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
85739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
85839b95ed1SBarry Smith   register Scalar x1,x2;
85939b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
860c16cb8f2SBarry Smith   int             j,n;
86139b95ed1SBarry Smith 
862c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
863c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
86439b95ed1SBarry Smith 
86539b95ed1SBarry Smith   idx   = a->j;
86639b95ed1SBarry Smith   v     = a->a;
86739b95ed1SBarry Smith   ii    = a->i;
86839b95ed1SBarry Smith 
869119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
870119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
871119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
872119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
873119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
874119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
875119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
876119a934aSSatish Balay       v += 4;
877119a934aSSatish Balay     }
8781a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
879119a934aSSatish Balay     z += 2;
880119a934aSSatish Balay   }
881c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
882c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
88339b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
88439b95ed1SBarry Smith   return 0;
88539b95ed1SBarry Smith }
88639b95ed1SBarry Smith 
8875615d1e5SSatish Balay #undef __FUNC__
8885615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
88939b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
89039b95ed1SBarry Smith {
89139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
89239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
893c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
89439b95ed1SBarry Smith 
895c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
896c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
89739b95ed1SBarry Smith 
89839b95ed1SBarry Smith   idx   = a->j;
89939b95ed1SBarry Smith   v     = a->a;
90039b95ed1SBarry Smith   ii    = a->i;
90139b95ed1SBarry Smith 
902119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
903119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
904119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
905119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
906119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
907119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
908119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
909119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
910119a934aSSatish Balay       v += 9;
911119a934aSSatish Balay     }
9121a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
913119a934aSSatish Balay     z += 3;
914119a934aSSatish Balay   }
915c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
916c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
91739b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
91839b95ed1SBarry Smith   return 0;
91939b95ed1SBarry Smith }
92039b95ed1SBarry Smith 
9215615d1e5SSatish Balay #undef __FUNC__
9225615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
92339b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
92439b95ed1SBarry Smith {
92539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
92639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
92739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
92839b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
929c16cb8f2SBarry Smith   int             j,n;
93039b95ed1SBarry Smith 
931c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
932c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
93339b95ed1SBarry Smith 
93439b95ed1SBarry Smith   idx   = a->j;
93539b95ed1SBarry Smith   v     = a->a;
93639b95ed1SBarry Smith   ii    = a->i;
93739b95ed1SBarry Smith 
938119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
939119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
940119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
941119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
942119a934aSSatish Balay       xb = x + 4*(*idx++);
943119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
944119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
945119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
946119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
947119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
948119a934aSSatish Balay       v += 16;
949119a934aSSatish Balay     }
9501a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
951119a934aSSatish Balay     z += 4;
952119a934aSSatish Balay   }
953c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
954c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
95539b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
95639b95ed1SBarry Smith   return 0;
95739b95ed1SBarry Smith }
95839b95ed1SBarry Smith 
9595615d1e5SSatish Balay #undef __FUNC__
9605615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
96139b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
96239b95ed1SBarry Smith {
96339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
96439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
96539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
966c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
96739b95ed1SBarry Smith 
968c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
969c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
97039b95ed1SBarry Smith 
97139b95ed1SBarry Smith   idx   = a->j;
97239b95ed1SBarry Smith   v     = a->a;
97339b95ed1SBarry Smith   ii    = a->i;
97439b95ed1SBarry Smith 
975119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
976119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
977119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
978119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
979119a934aSSatish Balay       xb = x + 5*(*idx++);
980119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
981119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
982119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
983119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
984119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
985119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
986119a934aSSatish Balay       v += 25;
987119a934aSSatish Balay     }
9881a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
989119a934aSSatish Balay     z += 5;
990119a934aSSatish Balay   }
991c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
992c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
99339b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
99439b95ed1SBarry Smith   return 0;
99539b95ed1SBarry Smith }
99639b95ed1SBarry Smith 
9975615d1e5SSatish Balay #undef __FUNC__
9985615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
99939b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
100039b95ed1SBarry Smith {
100139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
100239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1003c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1004c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1005c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
100639b95ed1SBarry Smith 
1007c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1008c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
100939b95ed1SBarry Smith 
101039b95ed1SBarry Smith   idx   = a->j;
101139b95ed1SBarry Smith   v     = a->a;
101239b95ed1SBarry Smith   ii    = a->i;
101339b95ed1SBarry Smith 
101439b95ed1SBarry Smith 
1015119a934aSSatish Balay   if (!a->mult_work) {
10163b547af2SSatish Balay     k = PetscMax(a->m,a->n);
10172b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1018119a934aSSatish Balay   }
1019119a934aSSatish Balay   work = a->mult_work;
1020119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1021119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1022119a934aSSatish Balay     ncols = n*bs;
1023119a934aSSatish Balay     workt = work;
1024119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1025119a934aSSatish Balay       xb = x + bs*(*idx++);
1026119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1027119a934aSSatish Balay       workt += bs;
1028119a934aSSatish Balay     }
10291a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1030119a934aSSatish Balay     v += n*bs2;
1031119a934aSSatish Balay     z += bs;
1032119a934aSSatish Balay   }
1033c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1034c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
10351a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1036bb42667fSSatish Balay   return 0;
1037bb42667fSSatish Balay }
1038bb42667fSSatish Balay 
10395615d1e5SSatish Balay #undef __FUNC__
10405615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1041f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1042f44d3a6dSSatish Balay {
1043f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1044f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1045c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1046f44d3a6dSSatish Balay 
1047c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1048c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1049c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1050f44d3a6dSSatish Balay 
1051f44d3a6dSSatish Balay   idx   = a->j;
1052f44d3a6dSSatish Balay   v     = a->a;
1053f44d3a6dSSatish Balay   ii    = a->i;
1054f44d3a6dSSatish Balay 
1055f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1056f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1057f44d3a6dSSatish Balay     sum  = y[i];
1058f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1059f44d3a6dSSatish Balay     z[i] = sum;
1060f44d3a6dSSatish Balay   }
1061c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1062c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1063c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1064f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1065f44d3a6dSSatish Balay   return 0;
1066f44d3a6dSSatish Balay }
1067f44d3a6dSSatish Balay 
10685615d1e5SSatish Balay #undef __FUNC__
10695615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1070f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1071f44d3a6dSSatish Balay {
1072f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1073f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1074f44d3a6dSSatish Balay   register Scalar x1,x2;
1075f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1076c16cb8f2SBarry Smith   int             j,n;
1077f44d3a6dSSatish Balay 
1078c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1079c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1080c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1081f44d3a6dSSatish Balay 
1082f44d3a6dSSatish Balay   idx   = a->j;
1083f44d3a6dSSatish Balay   v     = a->a;
1084f44d3a6dSSatish Balay   ii    = a->i;
1085f44d3a6dSSatish Balay 
1086f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1087f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1088f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1089f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1090f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1091f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1092f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1093f44d3a6dSSatish Balay       v += 4;
1094f44d3a6dSSatish Balay     }
1095f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1096f44d3a6dSSatish Balay     z += 2; y += 2;
1097f44d3a6dSSatish Balay   }
1098c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1099c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1100c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1101f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1102f44d3a6dSSatish Balay   return 0;
1103f44d3a6dSSatish Balay }
1104f44d3a6dSSatish Balay 
11055615d1e5SSatish Balay #undef __FUNC__
11065615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1107f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1108f44d3a6dSSatish Balay {
1109f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1110f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1111c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1112f44d3a6dSSatish Balay 
1113c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1114c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1115c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1116f44d3a6dSSatish Balay 
1117f44d3a6dSSatish Balay   idx   = a->j;
1118f44d3a6dSSatish Balay   v     = a->a;
1119f44d3a6dSSatish Balay   ii    = a->i;
1120f44d3a6dSSatish Balay 
1121f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1122f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1123f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1124f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1125f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1126f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1127f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1128f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1129f44d3a6dSSatish Balay       v += 9;
1130f44d3a6dSSatish Balay     }
1131f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1132f44d3a6dSSatish Balay     z += 3; y += 3;
1133f44d3a6dSSatish Balay   }
1134c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1135c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1136c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1137f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1138f44d3a6dSSatish Balay   return 0;
1139f44d3a6dSSatish Balay }
1140f44d3a6dSSatish Balay 
11415615d1e5SSatish Balay #undef __FUNC__
11425615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1143f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1144f44d3a6dSSatish Balay {
1145f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1146f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1147f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1148f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1149c16cb8f2SBarry Smith   int             j,n;
1150f44d3a6dSSatish Balay 
1151c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1152c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1153c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1154f44d3a6dSSatish Balay 
1155f44d3a6dSSatish Balay   idx   = a->j;
1156f44d3a6dSSatish Balay   v     = a->a;
1157f44d3a6dSSatish Balay   ii    = a->i;
1158f44d3a6dSSatish Balay 
1159f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1160f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1161f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1162f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1163f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1164f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1165f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1166f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1167f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1168f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1169f44d3a6dSSatish Balay       v += 16;
1170f44d3a6dSSatish Balay     }
1171f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1172f44d3a6dSSatish Balay     z += 4; y += 4;
1173f44d3a6dSSatish Balay   }
1174c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1175c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1176c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1177f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1178f44d3a6dSSatish Balay   return 0;
1179f44d3a6dSSatish Balay }
1180f44d3a6dSSatish Balay 
11815615d1e5SSatish Balay #undef __FUNC__
11825615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1183f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1184f44d3a6dSSatish Balay {
1185f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1186f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1187f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1188c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1189f44d3a6dSSatish Balay 
1190c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1191c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1192c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1193f44d3a6dSSatish Balay 
1194f44d3a6dSSatish Balay   idx   = a->j;
1195f44d3a6dSSatish Balay   v     = a->a;
1196f44d3a6dSSatish Balay   ii    = a->i;
1197f44d3a6dSSatish Balay 
1198f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1199f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1200f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1201f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1202f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1203f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1204f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1205f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1206f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1207f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1208f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1209f44d3a6dSSatish Balay       v += 25;
1210f44d3a6dSSatish Balay     }
1211f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1212f44d3a6dSSatish Balay     z += 5; y += 5;
1213f44d3a6dSSatish Balay   }
1214c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1215c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1216c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1217f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1218f44d3a6dSSatish Balay   return 0;
1219f44d3a6dSSatish Balay }
1220f44d3a6dSSatish Balay 
12215615d1e5SSatish Balay #undef __FUNC__
12225615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1223f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1224f44d3a6dSSatish Balay {
1225f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1226f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1227f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1228f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1229f44d3a6dSSatish Balay 
1230f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1231f44d3a6dSSatish Balay 
1232c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1233c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1234f44d3a6dSSatish Balay 
1235f44d3a6dSSatish Balay   idx   = a->j;
1236f44d3a6dSSatish Balay   v     = a->a;
1237f44d3a6dSSatish Balay   ii    = a->i;
1238f44d3a6dSSatish Balay 
1239f44d3a6dSSatish Balay 
1240f44d3a6dSSatish Balay   if (!a->mult_work) {
1241f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1242f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1243f44d3a6dSSatish Balay   }
1244f44d3a6dSSatish Balay   work = a->mult_work;
1245f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1246f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1247f44d3a6dSSatish Balay     ncols = n*bs;
1248f44d3a6dSSatish Balay     workt = work;
1249f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1250f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1251f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1252f44d3a6dSSatish Balay       workt += bs;
1253f44d3a6dSSatish Balay     }
1254f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1255f44d3a6dSSatish Balay     v += n*bs2;
1256f44d3a6dSSatish Balay     z += bs;
1257f44d3a6dSSatish Balay   }
1258c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1259c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1260f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1261f44d3a6dSSatish Balay   return 0;
1262f44d3a6dSSatish Balay }
1263f44d3a6dSSatish Balay 
12645615d1e5SSatish Balay #undef __FUNC__
12655615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
12661a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1267bb42667fSSatish Balay {
1268bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
12691a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1270bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1271bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
12729df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1273119a934aSSatish Balay 
1274119a934aSSatish Balay 
127590f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
127690f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1277bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1278bb42667fSSatish Balay 
1279119a934aSSatish Balay   idx   = a->j;
1280119a934aSSatish Balay   v     = a->a;
1281119a934aSSatish Balay   ii    = a->i;
1282119a934aSSatish Balay 
1283119a934aSSatish Balay   switch (bs) {
1284119a934aSSatish Balay   case 1:
1285119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1286119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1287119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1288119a934aSSatish Balay       ib = idx + ai[i];
1289119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1290bb1453f0SSatish Balay         rval    = ib[j];
1291bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1292119a934aSSatish Balay       }
1293119a934aSSatish Balay     }
1294119a934aSSatish Balay     break;
1295119a934aSSatish Balay   case 2:
1296119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1297119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1298119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1299119a934aSSatish Balay       ib = idx + ai[i];
1300119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1301119a934aSSatish Balay         rval      = ib[j]*2;
1302bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1303bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1304119a934aSSatish Balay         v += 4;
1305119a934aSSatish Balay       }
1306119a934aSSatish Balay     }
1307119a934aSSatish Balay     break;
1308119a934aSSatish Balay   case 3:
1309119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1310119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1311119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1312119a934aSSatish Balay       ib = idx + ai[i];
1313119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1314119a934aSSatish Balay         rval      = ib[j]*3;
1315bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1316bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1317bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1318119a934aSSatish Balay         v += 9;
1319119a934aSSatish Balay       }
1320119a934aSSatish Balay     }
1321119a934aSSatish Balay     break;
1322119a934aSSatish Balay   case 4:
1323119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1324119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1325119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1326119a934aSSatish Balay       ib = idx + ai[i];
1327119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1328119a934aSSatish Balay         rval      = ib[j]*4;
1329bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1330bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1331bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1332bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1333119a934aSSatish Balay         v += 16;
1334119a934aSSatish Balay       }
1335119a934aSSatish Balay     }
1336119a934aSSatish Balay     break;
1337119a934aSSatish Balay   case 5:
1338119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1339119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1340119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1341119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1342119a934aSSatish Balay       ib = idx + ai[i];
1343119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1344119a934aSSatish Balay         rval      = ib[j]*5;
1345bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1346bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1347bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1348bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1349bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1350119a934aSSatish Balay         v += 25;
1351119a934aSSatish Balay       }
1352119a934aSSatish Balay     }
1353119a934aSSatish Balay     break;
1354119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1355119a934aSSatish Balay     default: {
1356119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1357119a934aSSatish Balay       if (!a->mult_work) {
13583b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1359bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1360119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1361119a934aSSatish Balay       }
1362119a934aSSatish Balay       work = a->mult_work;
1363119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1364119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1365119a934aSSatish Balay         ncols = n*bs;
1366119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1367119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1368119a934aSSatish Balay         v += n*bs2;
1369119a934aSSatish Balay         x += bs;
1370119a934aSSatish Balay         workt = work;
1371119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1372119a934aSSatish Balay           zb = z + bs*(*idx++);
1373bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1374119a934aSSatish Balay           workt += bs;
1375119a934aSSatish Balay         }
1376119a934aSSatish Balay       }
1377119a934aSSatish Balay     }
1378119a934aSSatish Balay   }
13790419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
13800419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1381faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1382faf6580fSSatish Balay   return 0;
1383faf6580fSSatish Balay }
13841c351548SSatish Balay 
13855615d1e5SSatish Balay #undef __FUNC__
13865615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1387faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1388faf6580fSSatish Balay {
1389faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1390faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1391faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1392faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1393faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1394faf6580fSSatish Balay 
1395faf6580fSSatish Balay 
1396faf6580fSSatish Balay 
139790f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
139890f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1399faf6580fSSatish Balay 
1400648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1401648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1402648c1d95SSatish Balay 
1403faf6580fSSatish Balay 
1404faf6580fSSatish Balay   idx   = a->j;
1405faf6580fSSatish Balay   v     = a->a;
1406faf6580fSSatish Balay   ii    = a->i;
1407faf6580fSSatish Balay 
1408faf6580fSSatish Balay   switch (bs) {
1409faf6580fSSatish Balay   case 1:
1410faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1411faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1412faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1413faf6580fSSatish Balay       ib = idx + ai[i];
1414faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1415faf6580fSSatish Balay         rval    = ib[j];
1416faf6580fSSatish Balay         z[rval] += *v++ * x1;
1417faf6580fSSatish Balay       }
1418faf6580fSSatish Balay     }
1419faf6580fSSatish Balay     break;
1420faf6580fSSatish Balay   case 2:
1421faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1422faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1423faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1424faf6580fSSatish Balay       ib = idx + ai[i];
1425faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1426faf6580fSSatish Balay         rval      = ib[j]*2;
1427faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1428faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1429faf6580fSSatish Balay         v += 4;
1430faf6580fSSatish Balay       }
1431faf6580fSSatish Balay     }
1432faf6580fSSatish Balay     break;
1433faf6580fSSatish Balay   case 3:
1434faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1435faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1436faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1437faf6580fSSatish Balay       ib = idx + ai[i];
1438faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1439faf6580fSSatish Balay         rval      = ib[j]*3;
1440faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1441faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1442faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1443faf6580fSSatish Balay         v += 9;
1444faf6580fSSatish Balay       }
1445faf6580fSSatish Balay     }
1446faf6580fSSatish Balay     break;
1447faf6580fSSatish Balay   case 4:
1448faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1449faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1450faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1451faf6580fSSatish Balay       ib = idx + ai[i];
1452faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1453faf6580fSSatish Balay         rval      = ib[j]*4;
1454faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1455faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1456faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1457faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1458faf6580fSSatish Balay         v += 16;
1459faf6580fSSatish Balay       }
1460faf6580fSSatish Balay     }
1461faf6580fSSatish Balay     break;
1462faf6580fSSatish Balay   case 5:
1463faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1464faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1465faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1466faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1467faf6580fSSatish Balay       ib = idx + ai[i];
1468faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1469faf6580fSSatish Balay         rval      = ib[j]*5;
1470faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1471faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1472faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1473faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1474faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1475faf6580fSSatish Balay         v += 25;
1476faf6580fSSatish Balay       }
1477faf6580fSSatish Balay     }
1478faf6580fSSatish Balay     break;
1479faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1480faf6580fSSatish Balay     default: {
1481faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1482faf6580fSSatish Balay       if (!a->mult_work) {
1483faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1484faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1485faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1486faf6580fSSatish Balay       }
1487faf6580fSSatish Balay       work = a->mult_work;
1488faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1489faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1490faf6580fSSatish Balay         ncols = n*bs;
1491faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1492faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1493faf6580fSSatish Balay         v += n*bs2;
1494faf6580fSSatish Balay         x += bs;
1495faf6580fSSatish Balay         workt = work;
1496faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1497faf6580fSSatish Balay           zb = z + bs*(*idx++);
1498faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1499faf6580fSSatish Balay           workt += bs;
1500faf6580fSSatish Balay         }
1501faf6580fSSatish Balay       }
1502faf6580fSSatish Balay     }
1503faf6580fSSatish Balay   }
1504faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1505faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1506faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
15072593348eSBarry Smith   return 0;
15082593348eSBarry Smith }
15092593348eSBarry Smith 
15105615d1e5SSatish Balay #undef __FUNC__
15115615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_SeqBAIJ"
15124e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
15132593348eSBarry Smith {
1514b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15154e220ebcSLois Curfman McInnes 
15164e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
15174e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
15184e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
15194e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
15204e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
15214e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
15224e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
15234e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
15244e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
15254e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
15264e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
15274e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
15284e220ebcSLois Curfman McInnes   info->memory       = A->mem;
15294e220ebcSLois Curfman McInnes   if (A->factor) {
15304e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
15314e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
15324e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
15334e220ebcSLois Curfman McInnes   } else {
15344e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
15354e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
15364e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
15374e220ebcSLois Curfman McInnes   }
15382593348eSBarry Smith   return 0;
15392593348eSBarry Smith }
15402593348eSBarry Smith 
15415615d1e5SSatish Balay #undef __FUNC__
15425615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
154391d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
154491d316f6SSatish Balay {
154591d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
154691d316f6SSatish Balay 
1547e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
154891d316f6SSatish Balay 
154991d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
155091d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1551a7c10996SSatish Balay       (a->nz != b->nz)) {
155291d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
155391d316f6SSatish Balay   }
155491d316f6SSatish Balay 
155591d316f6SSatish Balay   /* if the a->i are the same */
155691d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
155791d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
155891d316f6SSatish Balay   }
155991d316f6SSatish Balay 
156091d316f6SSatish Balay   /* if a->j are the same */
156191d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
156291d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
156391d316f6SSatish Balay   }
156491d316f6SSatish Balay 
156591d316f6SSatish Balay   /* if a->a are the same */
156691d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
156791d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
156891d316f6SSatish Balay   }
156991d316f6SSatish Balay   *flg = PETSC_TRUE;
157091d316f6SSatish Balay   return 0;
157191d316f6SSatish Balay 
157291d316f6SSatish Balay }
157391d316f6SSatish Balay 
15745615d1e5SSatish Balay #undef __FUNC__
15755615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
157691d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
157791d316f6SSatish Balay {
157891d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15797e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
158017e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
158117e48fc4SSatish Balay 
15820513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
158317e48fc4SSatish Balay   bs   = a->bs;
158417e48fc4SSatish Balay   aa   = a->a;
158517e48fc4SSatish Balay   ai   = a->i;
158617e48fc4SSatish Balay   aj   = a->j;
158717e48fc4SSatish Balay   ambs = a->mbs;
15889df24120SSatish Balay   bs2  = a->bs2;
158991d316f6SSatish Balay 
159091d316f6SSatish Balay   VecSet(&zero,v);
159190f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1592e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
159317e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
159417e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
159517e48fc4SSatish Balay       if (aj[j] == i) {
159617e48fc4SSatish Balay         row  = i*bs;
15977e67e3f9SSatish Balay         aa_j = aa+j*bs2;
15987e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
159991d316f6SSatish Balay         break;
160091d316f6SSatish Balay       }
160191d316f6SSatish Balay     }
160291d316f6SSatish Balay   }
160391d316f6SSatish Balay   return 0;
160491d316f6SSatish Balay }
160591d316f6SSatish Balay 
16065615d1e5SSatish Balay #undef __FUNC__
16075615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
16089867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
16099867e207SSatish Balay {
16109867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16119867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
16127e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
16139867e207SSatish Balay 
16149867e207SSatish Balay   ai  = a->i;
16159867e207SSatish Balay   aj  = a->j;
16169867e207SSatish Balay   aa  = a->a;
16179867e207SSatish Balay   m   = a->m;
16189867e207SSatish Balay   n   = a->n;
16199867e207SSatish Balay   bs  = a->bs;
16209867e207SSatish Balay   mbs = a->mbs;
16219df24120SSatish Balay   bs2 = a->bs2;
16229867e207SSatish Balay   if (ll) {
162390f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1624e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
16259867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
16269867e207SSatish Balay       M  = ai[i+1] - ai[i];
16279867e207SSatish Balay       li = l + i*bs;
16287e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
16299867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
16307e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
16319867e207SSatish Balay           (*v++) *= li[k%bs];
16329867e207SSatish Balay         }
16339867e207SSatish Balay       }
16349867e207SSatish Balay     }
16359867e207SSatish Balay   }
16369867e207SSatish Balay 
16379867e207SSatish Balay   if (rr) {
163890f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1639e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
16409867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
16419867e207SSatish Balay       M  = ai[i+1] - ai[i];
16427e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
16439867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
16449867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
16459867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
16469867e207SSatish Balay           x = ri[k];
16479867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
16489867e207SSatish Balay         }
16499867e207SSatish Balay       }
16509867e207SSatish Balay     }
16519867e207SSatish Balay   }
16529867e207SSatish Balay   return 0;
16539867e207SSatish Balay }
16549867e207SSatish Balay 
16559867e207SSatish Balay 
1656b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1657b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
16582a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1659736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1660736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
16611a6a6d98SBarry Smith 
16621a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
16631a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
16641a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
16651a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
16661a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
16671a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
16681a6a6d98SBarry Smith 
16697fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
16707fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
16717fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
16727fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
16737fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
16747fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
16752593348eSBarry Smith 
16765615d1e5SSatish Balay #undef __FUNC__
16775615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
1678b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
16792593348eSBarry Smith {
1680b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16812593348eSBarry Smith   Scalar      *v = a->a;
16822593348eSBarry Smith   double      sum = 0.0;
16839df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
16842593348eSBarry Smith 
16852593348eSBarry Smith   if (type == NORM_FROBENIUS) {
16860419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
16872593348eSBarry Smith #if defined(PETSC_COMPLEX)
16882593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
16892593348eSBarry Smith #else
16902593348eSBarry Smith       sum += (*v)*(*v); v++;
16912593348eSBarry Smith #endif
16922593348eSBarry Smith     }
16932593348eSBarry Smith     *norm = sqrt(sum);
16942593348eSBarry Smith   }
16952593348eSBarry Smith   else {
1696e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
16972593348eSBarry Smith   }
16982593348eSBarry Smith   return 0;
16992593348eSBarry Smith }
17002593348eSBarry Smith 
17012593348eSBarry Smith /*
17022593348eSBarry Smith      note: This can only work for identity for row and col. It would
17032593348eSBarry Smith    be good to check this and otherwise generate an error.
17042593348eSBarry Smith */
17055615d1e5SSatish Balay #undef __FUNC__
17065615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
1707b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
17082593348eSBarry Smith {
1709b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
17102593348eSBarry Smith   Mat         outA;
1711de6a44a3SBarry Smith   int         ierr;
17122593348eSBarry Smith 
1713e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
17142593348eSBarry Smith 
17152593348eSBarry Smith   outA          = inA;
17162593348eSBarry Smith   inA->factor   = FACTOR_LU;
17172593348eSBarry Smith   a->row        = row;
17182593348eSBarry Smith   a->col        = col;
17192593348eSBarry Smith 
1720de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
17212593348eSBarry Smith 
17222593348eSBarry Smith   if (!a->diag) {
1723b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
17242593348eSBarry Smith   }
17257fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
17262593348eSBarry Smith   return 0;
17272593348eSBarry Smith }
17282593348eSBarry Smith 
17295615d1e5SSatish Balay #undef __FUNC__
17305615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
1731b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
17322593348eSBarry Smith {
1733b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
17349df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1735b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1736b6490206SBarry Smith   PLogFlops(totalnz);
17372593348eSBarry Smith   return 0;
17382593348eSBarry Smith }
17392593348eSBarry Smith 
17405615d1e5SSatish Balay #undef __FUNC__
17415615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
17427e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
17437e67e3f9SSatish Balay {
17447e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17457e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1746a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
17479df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
17487e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
17497e67e3f9SSatish Balay 
17507e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
17517e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1752e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1753e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1754a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
17557e67e3f9SSatish Balay     nrow = ailen[brow];
17567e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1757e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1758e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1759a7c10996SSatish Balay       col  = in[l] ;
17607e67e3f9SSatish Balay       bcol = col/bs;
17617e67e3f9SSatish Balay       cidx = col%bs;
17627e67e3f9SSatish Balay       ridx = row%bs;
17637e67e3f9SSatish Balay       high = nrow;
17647e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
17657e67e3f9SSatish Balay       while (high-low > 5) {
17667e67e3f9SSatish Balay         t = (low+high)/2;
17677e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
17687e67e3f9SSatish Balay         else             low  = t;
17697e67e3f9SSatish Balay       }
17707e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
17717e67e3f9SSatish Balay         if (rp[i] > bcol) break;
17727e67e3f9SSatish Balay         if (rp[i] == bcol) {
17737e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
17747e67e3f9SSatish Balay           goto finished;
17757e67e3f9SSatish Balay         }
17767e67e3f9SSatish Balay       }
17777e67e3f9SSatish Balay       *v++ = zero;
17787e67e3f9SSatish Balay       finished:;
17797e67e3f9SSatish Balay     }
17807e67e3f9SSatish Balay   }
17817e67e3f9SSatish Balay   return 0;
17827e67e3f9SSatish Balay }
17837e67e3f9SSatish Balay 
17845615d1e5SSatish Balay #undef __FUNC__
17855615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
17865a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
17875a838052SSatish Balay {
17885a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
17895a838052SSatish Balay   *bs = baij->bs;
17905a838052SSatish Balay   return 0;
17915a838052SSatish Balay }
17925a838052SSatish Balay 
1793d9b7c43dSSatish Balay /* idx should be of length atlease bs */
17945615d1e5SSatish Balay #undef __FUNC__
17955615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1796d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1797d9b7c43dSSatish Balay {
1798d9b7c43dSSatish Balay   int i,row;
1799d9b7c43dSSatish Balay   row = idx[0];
1800d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1801d9b7c43dSSatish Balay 
1802d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1803d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1804d9b7c43dSSatish Balay   }
1805d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1806d9b7c43dSSatish Balay   return 0;
1807d9b7c43dSSatish Balay }
1808d9b7c43dSSatish Balay 
18095615d1e5SSatish Balay #undef __FUNC__
18105615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
1811d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1812d9b7c43dSSatish Balay {
1813d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1814d9b7c43dSSatish Balay   IS          is_local;
1815d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1816d9b7c43dSSatish Balay   PetscTruth  flg;
1817d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1818d9b7c43dSSatish Balay 
1819d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1820d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1821d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1822537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1823d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1824d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1825d9b7c43dSSatish Balay 
1826d9b7c43dSSatish Balay   i = 0;
1827d9b7c43dSSatish Balay   while (i < is_n) {
1828e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1829d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1830d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1831d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1832d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1833d9b7c43dSSatish Balay       i += bs;
1834d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1835d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1836d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1837d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1838d9b7c43dSSatish Balay         aa[0] = zero;
1839d9b7c43dSSatish Balay         aa+=bs;
1840d9b7c43dSSatish Balay       }
1841d9b7c43dSSatish Balay       i++;
1842d9b7c43dSSatish Balay     }
1843d9b7c43dSSatish Balay   }
1844d9b7c43dSSatish Balay   if (diag) {
1845d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1846d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1847d9b7c43dSSatish Balay     }
1848d9b7c43dSSatish Balay   }
1849d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1850d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1851d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
18529a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1853d9b7c43dSSatish Balay 
1854d9b7c43dSSatish Balay   return 0;
1855d9b7c43dSSatish Balay }
18561c351548SSatish Balay 
18575615d1e5SSatish Balay #undef __FUNC__
18585615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1859ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1860ba4ca20aSSatish Balay {
1861ba4ca20aSSatish Balay   static int called = 0;
1862ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1863ba4ca20aSSatish Balay 
1864ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1865ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1866ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1867ba4ca20aSSatish Balay   return 0;
1868ba4ca20aSSatish Balay }
1869d9b7c43dSSatish Balay 
18702593348eSBarry Smith /* -------------------------------------------------------------------*/
1871cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
18729867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1873f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1874faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
18751a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1876b6490206SBarry Smith        0,0,
1877de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1878b6490206SBarry Smith        0,
1879f2501298SSatish Balay        MatTranspose_SeqBAIJ,
188017e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
18819867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1882584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1883b6490206SBarry Smith        0,
1884d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
18857fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1886b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1887de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1888d402145bSBarry Smith        0,0,
1889b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1890b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1891af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
18927e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1893ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
18943b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
18953b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
1896*92c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
1897*92c4ed94SBarry Smith        0,0,0,0,0,0,
1898*92c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
18992593348eSBarry Smith 
19005615d1e5SSatish Balay #undef __FUNC__
19015615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
19022593348eSBarry Smith /*@C
190344cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
190444cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
190544cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
19062bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
19072bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
19082593348eSBarry Smith 
19092593348eSBarry Smith    Input Parameters:
19102593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1911b6490206SBarry Smith .  bs - size of block
19122593348eSBarry Smith .  m - number of rows
19132593348eSBarry Smith .  n - number of columns
1914b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
19152bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
19162bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
19172593348eSBarry Smith 
19182593348eSBarry Smith    Output Parameter:
19192593348eSBarry Smith .  A - the matrix
19202593348eSBarry Smith 
19210513a670SBarry Smith    Options Database Keys:
19220513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
19230513a670SBarry Smith $                     block calculations (much solver)
19240513a670SBarry Smith $    -mat_block_size - size of the blocks to use
19250513a670SBarry Smith 
19262593348eSBarry Smith    Notes:
192744cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
19282593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
192944cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
19302593348eSBarry Smith 
19312593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
19322593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
19332593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
19346da5968aSLois Curfman McInnes    matrices.
19352593348eSBarry Smith 
193644cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
19372593348eSBarry Smith @*/
1938b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
19392593348eSBarry Smith {
19402593348eSBarry Smith   Mat         B;
1941b6490206SBarry Smith   Mat_SeqBAIJ *b;
19423b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
19433b2fbd54SBarry Smith 
19443b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
1945e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
1946b6490206SBarry Smith 
19470513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
19480513a670SBarry Smith 
1949f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1950e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
19512593348eSBarry Smith 
19522593348eSBarry Smith   *A = 0;
1953b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
19542593348eSBarry Smith   PLogObjectCreate(B);
1955b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
195644cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
19572593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
19581a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
19591a6a6d98SBarry Smith   if (!flg) {
19607fc0212eSBarry Smith     switch (bs) {
19617fc0212eSBarry Smith       case 1:
19627fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
19631a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
196439b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1965f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
19667fc0212eSBarry Smith        break;
19674eeb42bcSBarry Smith       case 2:
19684eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
19691a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
197039b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1971f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
19726d84be18SBarry Smith         break;
1973f327dd97SBarry Smith       case 3:
1974f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
19751a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
197639b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1977f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
19784eeb42bcSBarry Smith         break;
19796d84be18SBarry Smith       case 4:
19806d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
19811a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
198239b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1983f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
19846d84be18SBarry Smith         break;
19856d84be18SBarry Smith       case 5:
19866d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
19871a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
198839b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1989f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
19906d84be18SBarry Smith         break;
19917fc0212eSBarry Smith     }
19921a6a6d98SBarry Smith   }
1993b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1994b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
19952593348eSBarry Smith   B->factor           = 0;
19962593348eSBarry Smith   B->lupivotthreshold = 1.0;
199790f02eecSBarry Smith   B->mapping          = 0;
19982593348eSBarry Smith   b->row              = 0;
19992593348eSBarry Smith   b->col              = 0;
20002593348eSBarry Smith   b->reallocs         = 0;
20012593348eSBarry Smith 
200244cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
200344cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2004b6490206SBarry Smith   b->mbs     = mbs;
2005f2501298SSatish Balay   b->nbs     = nbs;
2006b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
20072593348eSBarry Smith   if (nnz == PETSC_NULL) {
2008119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
20092593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2010b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2011b6490206SBarry Smith     nz = nz*mbs;
20122593348eSBarry Smith   }
20132593348eSBarry Smith   else {
20142593348eSBarry Smith     nz = 0;
2015b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
20162593348eSBarry Smith   }
20172593348eSBarry Smith 
20182593348eSBarry Smith   /* allocate the matrix space */
20197e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
20202593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
20217e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
20227e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
20232593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
20242593348eSBarry Smith   b->i  = b->j + nz;
20252593348eSBarry Smith   b->singlemalloc = 1;
20262593348eSBarry Smith 
2027de6a44a3SBarry Smith   b->i[0] = 0;
2028b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
20292593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
20302593348eSBarry Smith   }
20312593348eSBarry Smith 
2032b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2033b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2034b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2035b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
20362593348eSBarry Smith 
2037b6490206SBarry Smith   b->bs               = bs;
20389df24120SSatish Balay   b->bs2              = bs2;
2039b6490206SBarry Smith   b->mbs              = mbs;
20402593348eSBarry Smith   b->nz               = 0;
204118e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
20422593348eSBarry Smith   b->sorted           = 0;
20432593348eSBarry Smith   b->roworiented      = 1;
20442593348eSBarry Smith   b->nonew            = 0;
20452593348eSBarry Smith   b->diag             = 0;
20462593348eSBarry Smith   b->solve_work       = 0;
2047de6a44a3SBarry Smith   b->mult_work        = 0;
20482593348eSBarry Smith   b->spptr            = 0;
20494e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
20504e220ebcSLois Curfman McInnes 
20512593348eSBarry Smith   *A = B;
20522593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
20532593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
20542593348eSBarry Smith   return 0;
20552593348eSBarry Smith }
20562593348eSBarry Smith 
20575615d1e5SSatish Balay #undef __FUNC__
20585615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2059b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
20602593348eSBarry Smith {
20612593348eSBarry Smith   Mat         C;
2062b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
20639df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2064de6a44a3SBarry Smith 
2065e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
20662593348eSBarry Smith 
20672593348eSBarry Smith   *B = 0;
2068b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
20692593348eSBarry Smith   PLogObjectCreate(C);
2070b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
20712593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2072b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2073b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
20742593348eSBarry Smith   C->factor     = A->factor;
20752593348eSBarry Smith   c->row        = 0;
20762593348eSBarry Smith   c->col        = 0;
20772593348eSBarry Smith   C->assembled  = PETSC_TRUE;
20782593348eSBarry Smith 
207944cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
208044cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
208144cd7ae7SLois Curfman McInnes   C->M          = a->m;
208244cd7ae7SLois Curfman McInnes   C->N          = a->n;
208344cd7ae7SLois Curfman McInnes 
2084b6490206SBarry Smith   c->bs         = a->bs;
20859df24120SSatish Balay   c->bs2        = a->bs2;
2086b6490206SBarry Smith   c->mbs        = a->mbs;
2087b6490206SBarry Smith   c->nbs        = a->nbs;
20882593348eSBarry Smith 
2089b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2090b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2091b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
20922593348eSBarry Smith     c->imax[i] = a->imax[i];
20932593348eSBarry Smith     c->ilen[i] = a->ilen[i];
20942593348eSBarry Smith   }
20952593348eSBarry Smith 
20962593348eSBarry Smith   /* allocate the matrix space */
20972593348eSBarry Smith   c->singlemalloc = 1;
20987e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
20992593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
21007e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2101de6a44a3SBarry Smith   c->i  = c->j + nz;
2102b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2103b6490206SBarry Smith   if (mbs > 0) {
2104de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
21052593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
21067e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
21072593348eSBarry Smith     }
21082593348eSBarry Smith   }
21092593348eSBarry Smith 
2110b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
21112593348eSBarry Smith   c->sorted      = a->sorted;
21122593348eSBarry Smith   c->roworiented = a->roworiented;
21132593348eSBarry Smith   c->nonew       = a->nonew;
21142593348eSBarry Smith 
21152593348eSBarry Smith   if (a->diag) {
2116b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2117b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2118b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
21192593348eSBarry Smith       c->diag[i] = a->diag[i];
21202593348eSBarry Smith     }
21212593348eSBarry Smith   }
21222593348eSBarry Smith   else c->diag          = 0;
21232593348eSBarry Smith   c->nz                 = a->nz;
21242593348eSBarry Smith   c->maxnz              = a->maxnz;
21252593348eSBarry Smith   c->solve_work         = 0;
21262593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
21277fc0212eSBarry Smith   c->mult_work          = 0;
21282593348eSBarry Smith   *B = C;
21292593348eSBarry Smith   return 0;
21302593348eSBarry Smith }
21312593348eSBarry Smith 
21325615d1e5SSatish Balay #undef __FUNC__
21335615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
213419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
21352593348eSBarry Smith {
2136b6490206SBarry Smith   Mat_SeqBAIJ  *a;
21372593348eSBarry Smith   Mat          B;
2138de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2139b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
214035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2141a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2142b6490206SBarry Smith   Scalar       *aa;
214319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
21442593348eSBarry Smith 
21451a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2146de6a44a3SBarry Smith   bs2  = bs*bs;
2147b6490206SBarry Smith 
21482593348eSBarry Smith   MPI_Comm_size(comm,&size);
2149e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
215090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
215177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2152e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
21532593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
21542593348eSBarry Smith 
2155e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
215635aab85fSBarry Smith 
215735aab85fSBarry Smith   /*
215835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
215935aab85fSBarry Smith     divisible by the blocksize
216035aab85fSBarry Smith   */
2161b6490206SBarry Smith   mbs        = M/bs;
216235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
216335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
216435aab85fSBarry Smith   else                  mbs++;
216535aab85fSBarry Smith   if (extra_rows) {
2166537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
216735aab85fSBarry Smith   }
2168b6490206SBarry Smith 
21692593348eSBarry Smith   /* read in row lengths */
217035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
217177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
217235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
21732593348eSBarry Smith 
2174b6490206SBarry Smith   /* read in column indices */
217535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
217677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
217735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2178b6490206SBarry Smith 
2179b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2180b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2181b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
218235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
218335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
218435aab85fSBarry Smith   masked = mask + mbs;
2185b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2186b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
218735aab85fSBarry Smith     nmask = 0;
2188b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2189b6490206SBarry Smith       kmax = rowlengths[rowcount];
2190b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
219135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
219235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2193b6490206SBarry Smith       }
2194b6490206SBarry Smith       rowcount++;
2195b6490206SBarry Smith     }
219635aab85fSBarry Smith     browlengths[i] += nmask;
219735aab85fSBarry Smith     /* zero out the mask elements we set */
219835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2199b6490206SBarry Smith   }
2200b6490206SBarry Smith 
22012593348eSBarry Smith   /* create our matrix */
220235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
220335aab85fSBarry Smith          CHKERRQ(ierr);
22042593348eSBarry Smith   B = *A;
2205b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
22062593348eSBarry Smith 
22072593348eSBarry Smith   /* set matrix "i" values */
2208de6a44a3SBarry Smith   a->i[0] = 0;
2209b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2210b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2211b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
22122593348eSBarry Smith   }
2213b6490206SBarry Smith   a->nz         = 0;
2214b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
22152593348eSBarry Smith 
2216b6490206SBarry Smith   /* read in nonzero values */
221735aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
221877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
221935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2220b6490206SBarry Smith 
2221b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2222b6490206SBarry Smith   nzcount = 0; jcount = 0;
2223b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2224b6490206SBarry Smith     nzcountb = nzcount;
222535aab85fSBarry Smith     nmask    = 0;
2226b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2227b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2228b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
222935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
223035aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2231b6490206SBarry Smith       }
2232b6490206SBarry Smith       rowcount++;
2233b6490206SBarry Smith     }
2234de6a44a3SBarry Smith     /* sort the masked values */
223577c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2236de6a44a3SBarry Smith 
2237b6490206SBarry Smith     /* set "j" values into matrix */
2238b6490206SBarry Smith     maskcount = 1;
223935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
224035aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2241de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2242b6490206SBarry Smith     }
2243b6490206SBarry Smith     /* set "a" values into matrix */
2244de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2245b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2246b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2247b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2248de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2249de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2250de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2251de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2252b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2253b6490206SBarry Smith       }
2254b6490206SBarry Smith     }
225535aab85fSBarry Smith     /* zero out the mask elements we set */
225635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2257b6490206SBarry Smith   }
2258e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2259b6490206SBarry Smith 
2260b6490206SBarry Smith   PetscFree(rowlengths);
2261b6490206SBarry Smith   PetscFree(browlengths);
2262b6490206SBarry Smith   PetscFree(aa);
2263b6490206SBarry Smith   PetscFree(jj);
2264b6490206SBarry Smith   PetscFree(mask);
2265b6490206SBarry Smith 
2266b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2267de6a44a3SBarry Smith 
2268de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2269de6a44a3SBarry Smith   if (flg) {
227019bcc07fSBarry Smith     Viewer tviewer;
227119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2272639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
227319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
227419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2275de6a44a3SBarry Smith   }
2276de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2277de6a44a3SBarry Smith   if (flg) {
227819bcc07fSBarry Smith     Viewer tviewer;
227919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2280639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
228119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
228219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2283de6a44a3SBarry Smith   }
2284de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2285de6a44a3SBarry Smith   if (flg) {
228619bcc07fSBarry Smith     Viewer tviewer;
228719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
228819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
228919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2290de6a44a3SBarry Smith   }
2291de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2292de6a44a3SBarry Smith   if (flg) {
229319bcc07fSBarry Smith     Viewer tviewer;
229419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2295639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
229619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
229719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2298de6a44a3SBarry Smith   }
2299de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2300de6a44a3SBarry Smith   if (flg) {
230119bcc07fSBarry Smith     Viewer tviewer;
230219bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
230319bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
230419bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
230519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2306de6a44a3SBarry Smith   }
23072593348eSBarry Smith   return 0;
23082593348eSBarry Smith }
23092593348eSBarry Smith 
23102593348eSBarry Smith 
23112593348eSBarry Smith 
2312