xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8a84c255a1281c021245f30e265b307d850f26a3)
12593348eSBarry Smith #ifndef lint
2*8a84c255SSatish Balay static char vcid[] = "$Id: baij.c,v 1.88 1997/02/04 01:21:20 balay Exp balay $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
195615d1e5SSatish Balay #undef __FUNC__
205615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
22de6a44a3SBarry Smith {
23de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
247fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
25de6a44a3SBarry Smith 
26de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
287fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
29de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30de6a44a3SBarry Smith       if (a->j[j] == i) {
31de6a44a3SBarry Smith         diag[i] = j;
32de6a44a3SBarry Smith         break;
33de6a44a3SBarry Smith       }
34de6a44a3SBarry Smith     }
35de6a44a3SBarry Smith   }
36de6a44a3SBarry Smith   a->diag = diag;
37de6a44a3SBarry Smith   return 0;
38de6a44a3SBarry Smith }
392593348eSBarry Smith 
402593348eSBarry Smith #include "draw.h"
412593348eSBarry Smith #include "pinclude/pviewer.h"
4277c4ece6SBarry Smith #include "sys.h"
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
725615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
935615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
1197e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
1347e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
1515615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes #else
18144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18344cd7ae7SLois Curfman McInnes #endif
18444cd7ae7SLois Curfman McInnes           }
18544cd7ae7SLois Curfman McInnes         }
18644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18744cd7ae7SLois Curfman McInnes       }
18844cd7ae7SLois Curfman McInnes     }
18944cd7ae7SLois Curfman McInnes   }
1902593348eSBarry Smith   else {
191b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
192b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
193b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
194b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1977e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1997e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20088685aaeSLois Curfman McInnes           }
20188685aaeSLois Curfman McInnes           else {
2027e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes #else
2057e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20688685aaeSLois Curfman McInnes #endif
2072593348eSBarry Smith           }
2082593348eSBarry Smith         }
2092593348eSBarry Smith         fprintf(fd,"\n");
2102593348eSBarry Smith       }
2112593348eSBarry Smith     }
212b6490206SBarry Smith   }
2132593348eSBarry Smith   fflush(fd);
2142593348eSBarry Smith   return 0;
2152593348eSBarry Smith }
2162593348eSBarry Smith 
2175615d1e5SSatish Balay #undef __FUNC__
2185615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Draw"
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
245b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
259b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
274b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
305b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
319b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
334b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
3445615d1e5SSatish Balay #undef __FUNC__
3455615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ"
346b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3472593348eSBarry Smith {
3482593348eSBarry Smith   Mat         A = (Mat) obj;
34919bcc07fSBarry Smith   ViewerType  vtype;
35019bcc07fSBarry Smith   int         ierr;
3512593348eSBarry Smith 
35219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35319bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
354e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
360b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3612593348eSBarry Smith   }
36219bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3633270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3642593348eSBarry Smith   }
3652593348eSBarry Smith   return 0;
3662593348eSBarry Smith }
367b6490206SBarry Smith 
368119a934aSSatish Balay #define CHUNKSIZE  10
369cd0e1443SSatish Balay 
3705615d1e5SSatish Balay #undef __FUNC__
3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373cd0e1443SSatish Balay {
374cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3789df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
379cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
380cd0e1443SSatish Balay 
381cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
382cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
384e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
385e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3863b2fbd54SBarry Smith #endif
3872c3acbe9SBarry Smith     rp   = aj + ai[brow];
3882c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3892c3acbe9SBarry Smith     rmax = imax[brow];
3902c3acbe9SBarry Smith     nrow = ailen[brow];
391cd0e1443SSatish Balay     low  = 0;
392cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
394e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
3963b2fbd54SBarry Smith #endif
397a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
398cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
399cd0e1443SSatish Balay       if (roworiented) {
400cd0e1443SSatish Balay         value = *v++;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       else {
403cd0e1443SSatish Balay         value = v[k + l*m];
404cd0e1443SSatish Balay       }
405cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4062c3acbe9SBarry Smith       while (high-low > 7) {
407cd0e1443SSatish Balay         t = (low+high)/2;
408cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
409cd0e1443SSatish Balay         else              low  = t;
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
412cd0e1443SSatish Balay         if (rp[i] > bcol) break;
413cd0e1443SSatish Balay         if (rp[i] == bcol) {
4147e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
415cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
416cd0e1443SSatish Balay           else                  *bap  = value;
417cd0e1443SSatish Balay           goto noinsert;
418cd0e1443SSatish Balay         }
419cd0e1443SSatish Balay       }
420cd0e1443SSatish Balay       if (nonew) goto noinsert;
421cd0e1443SSatish Balay       if (nrow >= rmax) {
422cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
423cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424cd0e1443SSatish Balay         Scalar *new_a;
425cd0e1443SSatish Balay 
426cd0e1443SSatish Balay         /* malloc new storage space */
4277e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4297e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
430cd0e1443SSatish Balay         new_i   = new_j + new_nz;
431cd0e1443SSatish Balay 
432cd0e1443SSatish Balay         /* copy over old data into new slots */
433cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4347e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438cd0e1443SSatish Balay                                                            len*sizeof(int));
439a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443cd0e1443SSatish Balay         /* free up old matrix storage */
444cd0e1443SSatish Balay         PetscFree(a->a);
445cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447cd0e1443SSatish Balay         a->singlemalloc = 1;
448cd0e1443SSatish Balay 
449a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4517e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
45218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
453cd0e1443SSatish Balay         a->reallocs++;
454119a934aSSatish Balay         a->nz++;
455cd0e1443SSatish Balay       }
4567e67e3f9SSatish Balay       N = nrow++ - 1;
457cd0e1443SSatish Balay       /* shift up all the later entries in this row */
458cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
459cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4607e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461cd0e1443SSatish Balay       }
4627e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463cd0e1443SSatish Balay       rp[i]                      = bcol;
4647e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
465cd0e1443SSatish Balay       noinsert:;
466cd0e1443SSatish Balay       low = i;
467cd0e1443SSatish Balay     }
468cd0e1443SSatish Balay     ailen[brow] = nrow;
469cd0e1443SSatish Balay   }
470cd0e1443SSatish Balay   return 0;
471cd0e1443SSatish Balay }
472cd0e1443SSatish Balay 
4735615d1e5SSatish Balay #undef __FUNC__
47492c4ed94SBarry Smith #define __FUNC__ "MatSetValues_SeqBAIJ"
47592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
47692c4ed94SBarry Smith {
47792c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
478*8a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
47992c4ed94SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
480*8a84c255SSatish Balay   int         *aj=a->j,nonew=a->nonew;
481*8a84c255SSatish Balay   int          bs2=a->bs2,bs=a->bs;
482*8a84c255SSatish Balay   Scalar      *ap,*value,*aa=a->a,*bap;
48392c4ed94SBarry Smith 
48492c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
48592c4ed94SBarry Smith     row  = im[k];
48692c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
48792c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
488*8a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
48992c4ed94SBarry Smith #endif
49092c4ed94SBarry Smith     rp   = aj + ai[row];
49192c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
49292c4ed94SBarry Smith     rmax = imax[row];
49392c4ed94SBarry Smith     nrow = ailen[row];
49492c4ed94SBarry Smith     low  = 0;
49592c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
49692c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
49792c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
498*8a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
49992c4ed94SBarry Smith #endif
50092c4ed94SBarry Smith       col = in[l];
50192c4ed94SBarry Smith       if (roworiented) {
502*8a84c255SSatish Balay         value = v; v+=bs2;
50392c4ed94SBarry Smith       }
50492c4ed94SBarry Smith       else {
505*8a84c255SSatish Balay         value = v+(k + l*m)*bs2;
50692c4ed94SBarry Smith       }
50792c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
50892c4ed94SBarry Smith       while (high-low > 7) {
50992c4ed94SBarry Smith         t = (low+high)/2;
51092c4ed94SBarry Smith         if (rp[t] > col) high = t;
51192c4ed94SBarry Smith         else             low  = t;
51292c4ed94SBarry Smith       }
51392c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
51492c4ed94SBarry Smith         if (rp[i] > col) break;
51592c4ed94SBarry Smith         if (rp[i] == col) {
516*8a84c255SSatish Balay           bap  = ap +  bs2*i;
517*8a84c255SSatish Balay           if (is == ADD_VALUES) {
518*8a84c255SSatish Balay             for ( ii=0; ii<bs; ii++)
519*8a84c255SSatish Balay               for (jj=ii; jj<bs2; jj+=bs )
520*8a84c255SSatish Balay                 bap[jj] += *value++;
521*8a84c255SSatish Balay           }
522*8a84c255SSatish Balay           else {
523*8a84c255SSatish Balay             for ( ii=0; ii<bs; ii++)
524*8a84c255SSatish Balay               for (jj=ii; jj<bs2; jj+=bs )
525*8a84c255SSatish Balay                 bap[jj] += *value++;
526*8a84c255SSatish Balay           }
52792c4ed94SBarry Smith           goto noinsert;
52892c4ed94SBarry Smith         }
52992c4ed94SBarry Smith       }
53092c4ed94SBarry Smith       if (nonew) goto noinsert;
53192c4ed94SBarry Smith       if (nrow >= rmax) {
53292c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
53392c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
53492c4ed94SBarry Smith         Scalar *new_a;
53592c4ed94SBarry Smith 
53692c4ed94SBarry Smith         /* malloc new storage space */
53792c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
53892c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
53992c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
54092c4ed94SBarry Smith         new_i   = new_j + new_nz;
54192c4ed94SBarry Smith 
54292c4ed94SBarry Smith         /* copy over old data into new slots */
54392c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
54492c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
54592c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
54692c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
54792c4ed94SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
54892c4ed94SBarry Smith                                                            len*sizeof(int));
54992c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
55092c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
55192c4ed94SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
55292c4ed94SBarry Smith                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
55392c4ed94SBarry Smith         /* free up old matrix storage */
55492c4ed94SBarry Smith         PetscFree(a->a);
55592c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
55692c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
55792c4ed94SBarry Smith         a->singlemalloc = 1;
55892c4ed94SBarry Smith 
55992c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
56092c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
56192c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
56292c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
56392c4ed94SBarry Smith         a->reallocs++;
56492c4ed94SBarry Smith         a->nz++;
56592c4ed94SBarry Smith       }
56692c4ed94SBarry Smith       N = nrow++ - 1;
56792c4ed94SBarry Smith       /* shift up all the later entries in this row */
56892c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
56992c4ed94SBarry Smith         rp[ii+1] = rp[ii];
57092c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
57192c4ed94SBarry Smith       }
57292c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
57392c4ed94SBarry Smith       rp[i] = col;
574*8a84c255SSatish Balay       bap   = ap +  bs2*i;
575*8a84c255SSatish Balay       for ( ii=0; ii<bs; ii++)
576*8a84c255SSatish Balay         for (jj=ii; jj<bs2; jj+=bs )
577*8a84c255SSatish Balay           bap[jj] += *value++;
57892c4ed94SBarry Smith       noinsert:;
57992c4ed94SBarry Smith       low = i;
58092c4ed94SBarry Smith     }
58192c4ed94SBarry Smith     ailen[row] = nrow;
58292c4ed94SBarry Smith   }
58392c4ed94SBarry Smith   return 0;
58492c4ed94SBarry Smith }
58592c4ed94SBarry Smith 
58692c4ed94SBarry Smith #undef __FUNC__
5875615d1e5SSatish Balay #define __FUNC__ "MatGetSize_SeqBAIJ"
5880b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
5890b824a48SSatish Balay {
5900b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5910b824a48SSatish Balay   *m = a->m; *n = a->n;
5920b824a48SSatish Balay   return 0;
5930b824a48SSatish Balay }
5940b824a48SSatish Balay 
5955615d1e5SSatish Balay #undef __FUNC__
5965615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
5970b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
5980b824a48SSatish Balay {
5990b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6000b824a48SSatish Balay   *m = 0; *n = a->m;
6010b824a48SSatish Balay   return 0;
6020b824a48SSatish Balay }
6030b824a48SSatish Balay 
6045615d1e5SSatish Balay #undef __FUNC__
6055615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6069867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6079867e207SSatish Balay {
6089867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6097e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6109867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6119867e207SSatish Balay 
6129867e207SSatish Balay   bs  = a->bs;
6139867e207SSatish Balay   ai  = a->i;
6149867e207SSatish Balay   aj  = a->j;
6159867e207SSatish Balay   aa  = a->a;
6169df24120SSatish Balay   bs2 = a->bs2;
6179867e207SSatish Balay 
618e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6199867e207SSatish Balay 
6209867e207SSatish Balay   bn  = row/bs;   /* Block number */
6219867e207SSatish Balay   bp  = row % bs; /* Block Position */
6229867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6239867e207SSatish Balay   *nz = bs*M;
6249867e207SSatish Balay 
6259867e207SSatish Balay   if (v) {
6269867e207SSatish Balay     *v = 0;
6279867e207SSatish Balay     if (*nz) {
6289867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6299867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6309867e207SSatish Balay         v_i  = *v + i*bs;
6317e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6327e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6339867e207SSatish Balay       }
6349867e207SSatish Balay     }
6359867e207SSatish Balay   }
6369867e207SSatish Balay 
6379867e207SSatish Balay   if (idx) {
6389867e207SSatish Balay     *idx = 0;
6399867e207SSatish Balay     if (*nz) {
6409867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6419867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6429867e207SSatish Balay         idx_i = *idx + i*bs;
6439867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6449867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6459867e207SSatish Balay       }
6469867e207SSatish Balay     }
6479867e207SSatish Balay   }
6489867e207SSatish Balay   return 0;
6499867e207SSatish Balay }
6509867e207SSatish Balay 
6515615d1e5SSatish Balay #undef __FUNC__
6525615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6539867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6549867e207SSatish Balay {
6559867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6569867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
6579867e207SSatish Balay   return 0;
6589867e207SSatish Balay }
659b6490206SBarry Smith 
6605615d1e5SSatish Balay #undef __FUNC__
6615615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
662f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
663f2501298SSatish Balay {
664f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
665f2501298SSatish Balay   Mat         C;
666f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
6679df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
668f2501298SSatish Balay   Scalar      *array=a->a;
669f2501298SSatish Balay 
670f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
671e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
672f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
673f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
674a7c10996SSatish Balay 
675a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
676f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
677f2501298SSatish Balay   PetscFree(col);
678f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
679f2501298SSatish Balay   cols = rows + bs;
680f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
681f2501298SSatish Balay     cols[0] = i*bs;
682f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
683f2501298SSatish Balay     len = ai[i+1] - ai[i];
684f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
685f2501298SSatish Balay       rows[0] = (*aj++)*bs;
686f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
687f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
688f2501298SSatish Balay       array += bs2;
689f2501298SSatish Balay     }
690f2501298SSatish Balay   }
6911073c447SSatish Balay   PetscFree(rows);
692f2501298SSatish Balay 
6936d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6946d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
695f2501298SSatish Balay 
696f2501298SSatish Balay   if (B != PETSC_NULL) {
697f2501298SSatish Balay     *B = C;
698f2501298SSatish Balay   } else {
699f2501298SSatish Balay     /* This isn't really an in-place transpose */
700f2501298SSatish Balay     PetscFree(a->a);
701f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
702f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
703f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
704f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
705f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
706f2501298SSatish Balay     PetscFree(a);
707f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
708f2501298SSatish Balay     PetscHeaderDestroy(C);
709f2501298SSatish Balay   }
710f2501298SSatish Balay   return 0;
711f2501298SSatish Balay }
712f2501298SSatish Balay 
713f2501298SSatish Balay 
7145615d1e5SSatish Balay #undef __FUNC__
7155615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
716584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
717584200bdSSatish Balay {
718584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
719584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
720a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
721d402145bSBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax;
722584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
723584200bdSSatish Balay 
7246d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
725584200bdSSatish Balay 
726d402145bSBarry Smith   rmax = ailen[0];
727584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
728584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
729584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
730d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
731584200bdSSatish Balay     if (fshift) {
732a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
733584200bdSSatish Balay       N = ailen[i];
734584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
735584200bdSSatish Balay         ip[j-fshift] = ip[j];
7367e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
737584200bdSSatish Balay       }
738584200bdSSatish Balay     }
739584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
740584200bdSSatish Balay   }
741584200bdSSatish Balay   if (mbs) {
742584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
743584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
744584200bdSSatish Balay   }
745584200bdSSatish Balay   /* reset ilen and imax for each row */
746584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
747584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
748584200bdSSatish Balay   }
749a7c10996SSatish Balay   a->nz = ai[mbs];
750584200bdSSatish Balay 
751584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
752584200bdSSatish Balay   if (fshift && a->diag) {
753584200bdSSatish Balay     PetscFree(a->diag);
754584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
755584200bdSSatish Balay     a->diag = 0;
756584200bdSSatish Balay   }
7573d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
758219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
7593d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
760584200bdSSatish Balay            a->reallocs);
761d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
762e2f3b5e9SSatish Balay   a->reallocs          = 0;
7634e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
7644e220ebcSLois Curfman McInnes 
765584200bdSSatish Balay   return 0;
766584200bdSSatish Balay }
767584200bdSSatish Balay 
7685615d1e5SSatish Balay #undef __FUNC__
7695615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
770b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
7712593348eSBarry Smith {
772b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7739df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
7742593348eSBarry Smith   return 0;
7752593348eSBarry Smith }
7762593348eSBarry Smith 
7775615d1e5SSatish Balay #undef __FUNC__
7785615d1e5SSatish Balay #define __FUNC__ "MatDestroy_SeqBAIJ"
779b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
7802593348eSBarry Smith {
7812593348eSBarry Smith   Mat         A  = (Mat) obj;
782b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
78390f02eecSBarry Smith   int         ierr;
7842593348eSBarry Smith 
7852593348eSBarry Smith #if defined(PETSC_LOG)
7862593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
7872593348eSBarry Smith #endif
7882593348eSBarry Smith   PetscFree(a->a);
7892593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
7902593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
7912593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
7922593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
7932593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
794de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
7952593348eSBarry Smith   PetscFree(a);
79690f02eecSBarry Smith   if (A->mapping) {
79790f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
79890f02eecSBarry Smith   }
799f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
800f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8012593348eSBarry Smith   return 0;
8022593348eSBarry Smith }
8032593348eSBarry Smith 
8045615d1e5SSatish Balay #undef __FUNC__
8055615d1e5SSatish Balay #define __FUNC__ "MatSetOption_SeqBAIJ"
806b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8072593348eSBarry Smith {
808b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8096d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
8106d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
8116d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
812219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
8136d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
8146d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
8156d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
816219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8176d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8186d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
81990f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
82090f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
82194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8226d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
823e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8242593348eSBarry Smith   else
825e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8262593348eSBarry Smith   return 0;
8272593348eSBarry Smith }
8282593348eSBarry Smith 
8292593348eSBarry Smith 
8302593348eSBarry Smith /* -------------------------------------------------------*/
8312593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8322593348eSBarry Smith /* -------------------------------------------------------*/
833b6490206SBarry Smith #include "pinclude/plapack.h"
834b6490206SBarry Smith 
8355615d1e5SSatish Balay #undef __FUNC__
8365615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
83739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8382593348eSBarry Smith {
839b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
84039b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
841c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8422593348eSBarry Smith 
843c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
844c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
845b6490206SBarry Smith 
846119a934aSSatish Balay   idx   = a->j;
847119a934aSSatish Balay   v     = a->a;
848119a934aSSatish Balay   ii    = a->i;
849119a934aSSatish Balay 
850119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
851119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
852119a934aSSatish Balay     sum  = 0.0;
853119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8541a6a6d98SBarry Smith     z[i] = sum;
855119a934aSSatish Balay   }
856c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
857c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
85839b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
85939b95ed1SBarry Smith   return 0;
86039b95ed1SBarry Smith }
86139b95ed1SBarry Smith 
8625615d1e5SSatish Balay #undef __FUNC__
8635615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
86439b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
86539b95ed1SBarry Smith {
86639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
86739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
86839b95ed1SBarry Smith   register Scalar x1,x2;
86939b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
870c16cb8f2SBarry Smith   int             j,n;
87139b95ed1SBarry Smith 
872c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
873c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
87439b95ed1SBarry Smith 
87539b95ed1SBarry Smith   idx   = a->j;
87639b95ed1SBarry Smith   v     = a->a;
87739b95ed1SBarry Smith   ii    = a->i;
87839b95ed1SBarry Smith 
879119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
880119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
881119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
882119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
883119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
884119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
885119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
886119a934aSSatish Balay       v += 4;
887119a934aSSatish Balay     }
8881a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
889119a934aSSatish Balay     z += 2;
890119a934aSSatish Balay   }
891c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
892c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
89339b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
89439b95ed1SBarry Smith   return 0;
89539b95ed1SBarry Smith }
89639b95ed1SBarry Smith 
8975615d1e5SSatish Balay #undef __FUNC__
8985615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
89939b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
90039b95ed1SBarry Smith {
90139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
90239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
903c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
90439b95ed1SBarry Smith 
905c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
906c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
90739b95ed1SBarry Smith 
90839b95ed1SBarry Smith   idx   = a->j;
90939b95ed1SBarry Smith   v     = a->a;
91039b95ed1SBarry Smith   ii    = a->i;
91139b95ed1SBarry Smith 
912119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
913119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
914119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
915119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
916119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
917119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
918119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
919119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
920119a934aSSatish Balay       v += 9;
921119a934aSSatish Balay     }
9221a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
923119a934aSSatish Balay     z += 3;
924119a934aSSatish Balay   }
925c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
926c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
92739b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
92839b95ed1SBarry Smith   return 0;
92939b95ed1SBarry Smith }
93039b95ed1SBarry Smith 
9315615d1e5SSatish Balay #undef __FUNC__
9325615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
93339b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
93439b95ed1SBarry Smith {
93539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
93639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
93739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
93839b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
939c16cb8f2SBarry Smith   int             j,n;
94039b95ed1SBarry Smith 
941c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
942c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
94339b95ed1SBarry Smith 
94439b95ed1SBarry Smith   idx   = a->j;
94539b95ed1SBarry Smith   v     = a->a;
94639b95ed1SBarry Smith   ii    = a->i;
94739b95ed1SBarry Smith 
948119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
949119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
950119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
951119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
952119a934aSSatish Balay       xb = x + 4*(*idx++);
953119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
954119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
955119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
956119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
957119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
958119a934aSSatish Balay       v += 16;
959119a934aSSatish Balay     }
9601a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
961119a934aSSatish Balay     z += 4;
962119a934aSSatish Balay   }
963c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
964c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
96539b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
96639b95ed1SBarry Smith   return 0;
96739b95ed1SBarry Smith }
96839b95ed1SBarry Smith 
9695615d1e5SSatish Balay #undef __FUNC__
9705615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
97139b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
97239b95ed1SBarry Smith {
97339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
97439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
97539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
976c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
97739b95ed1SBarry Smith 
978c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
979c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
98039b95ed1SBarry Smith 
98139b95ed1SBarry Smith   idx   = a->j;
98239b95ed1SBarry Smith   v     = a->a;
98339b95ed1SBarry Smith   ii    = a->i;
98439b95ed1SBarry Smith 
985119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
986119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
987119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
988119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
989119a934aSSatish Balay       xb = x + 5*(*idx++);
990119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
991119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
992119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
993119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
994119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
995119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
996119a934aSSatish Balay       v += 25;
997119a934aSSatish Balay     }
9981a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
999119a934aSSatish Balay     z += 5;
1000119a934aSSatish Balay   }
1001c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1002c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
100339b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
100439b95ed1SBarry Smith   return 0;
100539b95ed1SBarry Smith }
100639b95ed1SBarry Smith 
10075615d1e5SSatish Balay #undef __FUNC__
100848e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
100948e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
101048e9ddb2SSatish Balay {
101148e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
101248e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
101348e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
101448e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
101548e9ddb2SSatish Balay 
101648e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
101748e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
101848e9ddb2SSatish Balay 
101948e9ddb2SSatish Balay   idx   = a->j;
102048e9ddb2SSatish Balay   v     = a->a;
102148e9ddb2SSatish Balay   ii    = a->i;
102248e9ddb2SSatish Balay 
102348e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
102448e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
102548e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
102648e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
102748e9ddb2SSatish Balay       xb = x + 7*(*idx++);
102848e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
102948e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
103048e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
103148e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
103248e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
103348e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
103448e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
103548e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
103648e9ddb2SSatish Balay       v += 49;
103748e9ddb2SSatish Balay     }
103848e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
103948e9ddb2SSatish Balay     z += 7;
104048e9ddb2SSatish Balay   }
104148e9ddb2SSatish Balay 
104248e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
104348e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
104448e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
104548e9ddb2SSatish Balay   return 0;
104648e9ddb2SSatish Balay }
104748e9ddb2SSatish Balay 
104848e9ddb2SSatish Balay #undef __FUNC__
10495615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
105039b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
105139b95ed1SBarry Smith {
105239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
105339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1054c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1055c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1056c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
105739b95ed1SBarry Smith 
1058c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1059c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
106039b95ed1SBarry Smith 
106139b95ed1SBarry Smith   idx   = a->j;
106239b95ed1SBarry Smith   v     = a->a;
106339b95ed1SBarry Smith   ii    = a->i;
106439b95ed1SBarry Smith 
106539b95ed1SBarry Smith 
1066119a934aSSatish Balay   if (!a->mult_work) {
10673b547af2SSatish Balay     k = PetscMax(a->m,a->n);
10682b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1069119a934aSSatish Balay   }
1070119a934aSSatish Balay   work = a->mult_work;
1071119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1072119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1073119a934aSSatish Balay     ncols = n*bs;
1074119a934aSSatish Balay     workt = work;
1075119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1076119a934aSSatish Balay       xb = x + bs*(*idx++);
1077119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1078119a934aSSatish Balay       workt += bs;
1079119a934aSSatish Balay     }
10801a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1081119a934aSSatish Balay     v += n*bs2;
1082119a934aSSatish Balay     z += bs;
1083119a934aSSatish Balay   }
1084c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1085c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
10861a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1087bb42667fSSatish Balay   return 0;
1088bb42667fSSatish Balay }
1089bb42667fSSatish Balay 
10905615d1e5SSatish Balay #undef __FUNC__
10915615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1092f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1093f44d3a6dSSatish Balay {
1094f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1095f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1096c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1097f44d3a6dSSatish Balay 
1098c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1099c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1100c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1101f44d3a6dSSatish Balay 
1102f44d3a6dSSatish Balay   idx   = a->j;
1103f44d3a6dSSatish Balay   v     = a->a;
1104f44d3a6dSSatish Balay   ii    = a->i;
1105f44d3a6dSSatish Balay 
1106f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1107f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1108f44d3a6dSSatish Balay     sum  = y[i];
1109f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1110f44d3a6dSSatish Balay     z[i] = sum;
1111f44d3a6dSSatish Balay   }
1112c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1113c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1114c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1115f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1116f44d3a6dSSatish Balay   return 0;
1117f44d3a6dSSatish Balay }
1118f44d3a6dSSatish Balay 
11195615d1e5SSatish Balay #undef __FUNC__
11205615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1121f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1122f44d3a6dSSatish Balay {
1123f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1124f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1125f44d3a6dSSatish Balay   register Scalar x1,x2;
1126f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1127c16cb8f2SBarry Smith   int             j,n;
1128f44d3a6dSSatish Balay 
1129c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1130c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1131c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1132f44d3a6dSSatish Balay 
1133f44d3a6dSSatish Balay   idx   = a->j;
1134f44d3a6dSSatish Balay   v     = a->a;
1135f44d3a6dSSatish Balay   ii    = a->i;
1136f44d3a6dSSatish Balay 
1137f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1138f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1139f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1140f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1141f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1142f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1143f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1144f44d3a6dSSatish Balay       v += 4;
1145f44d3a6dSSatish Balay     }
1146f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1147f44d3a6dSSatish Balay     z += 2; y += 2;
1148f44d3a6dSSatish Balay   }
1149c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1150c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1151c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1152f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1153f44d3a6dSSatish Balay   return 0;
1154f44d3a6dSSatish Balay }
1155f44d3a6dSSatish Balay 
11565615d1e5SSatish Balay #undef __FUNC__
11575615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1158f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1159f44d3a6dSSatish Balay {
1160f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1161f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1162c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1163f44d3a6dSSatish Balay 
1164c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1165c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1166c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1167f44d3a6dSSatish Balay 
1168f44d3a6dSSatish Balay   idx   = a->j;
1169f44d3a6dSSatish Balay   v     = a->a;
1170f44d3a6dSSatish Balay   ii    = a->i;
1171f44d3a6dSSatish Balay 
1172f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1173f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1174f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1175f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1176f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1177f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1178f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1179f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1180f44d3a6dSSatish Balay       v += 9;
1181f44d3a6dSSatish Balay     }
1182f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1183f44d3a6dSSatish Balay     z += 3; y += 3;
1184f44d3a6dSSatish Balay   }
1185c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1186c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1187c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1188f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1189f44d3a6dSSatish Balay   return 0;
1190f44d3a6dSSatish Balay }
1191f44d3a6dSSatish Balay 
11925615d1e5SSatish Balay #undef __FUNC__
11935615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1194f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1195f44d3a6dSSatish Balay {
1196f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1197f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1198f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1199f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1200c16cb8f2SBarry Smith   int             j,n;
1201f44d3a6dSSatish Balay 
1202c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1203c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1204c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1205f44d3a6dSSatish Balay 
1206f44d3a6dSSatish Balay   idx   = a->j;
1207f44d3a6dSSatish Balay   v     = a->a;
1208f44d3a6dSSatish Balay   ii    = a->i;
1209f44d3a6dSSatish Balay 
1210f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1211f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1212f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1213f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1214f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1215f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1216f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1217f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1218f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1219f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1220f44d3a6dSSatish Balay       v += 16;
1221f44d3a6dSSatish Balay     }
1222f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1223f44d3a6dSSatish Balay     z += 4; y += 4;
1224f44d3a6dSSatish Balay   }
1225c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1226c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1227c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1228f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1229f44d3a6dSSatish Balay   return 0;
1230f44d3a6dSSatish Balay }
1231f44d3a6dSSatish Balay 
12325615d1e5SSatish Balay #undef __FUNC__
12335615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1234f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1235f44d3a6dSSatish Balay {
1236f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1237f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1238f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1239c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1240f44d3a6dSSatish Balay 
1241c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1242c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1243c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1244f44d3a6dSSatish Balay 
1245f44d3a6dSSatish Balay   idx   = a->j;
1246f44d3a6dSSatish Balay   v     = a->a;
1247f44d3a6dSSatish Balay   ii    = a->i;
1248f44d3a6dSSatish Balay 
1249f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1250f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1251f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1252f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1253f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1254f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1255f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1256f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1257f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1258f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1259f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1260f44d3a6dSSatish Balay       v += 25;
1261f44d3a6dSSatish Balay     }
1262f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1263f44d3a6dSSatish Balay     z += 5; y += 5;
1264f44d3a6dSSatish Balay   }
1265c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1266c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1267c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1268f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1269f44d3a6dSSatish Balay   return 0;
1270f44d3a6dSSatish Balay }
1271f44d3a6dSSatish Balay 
12725615d1e5SSatish Balay #undef __FUNC__
127348e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
127448e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
127548e9ddb2SSatish Balay {
127648e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
127748e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
127848e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
127948e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
128048e9ddb2SSatish Balay 
128148e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
128248e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
128348e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
128448e9ddb2SSatish Balay 
128548e9ddb2SSatish Balay   idx   = a->j;
128648e9ddb2SSatish Balay   v     = a->a;
128748e9ddb2SSatish Balay   ii    = a->i;
128848e9ddb2SSatish Balay 
128948e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
129048e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
129148e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
129248e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
129348e9ddb2SSatish Balay       xb = x + 7*(*idx++);
129448e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
129548e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
129648e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
129748e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
129848e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
129948e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
130048e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
130148e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
130248e9ddb2SSatish Balay       v += 49;
130348e9ddb2SSatish Balay     }
130448e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
130548e9ddb2SSatish Balay     z += 7; y += 7;
130648e9ddb2SSatish Balay   }
130748e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
130848e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
130948e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
131048e9ddb2SSatish Balay   PLogFlops(98*a->nz);
131148e9ddb2SSatish Balay   return 0;
131248e9ddb2SSatish Balay }
131348e9ddb2SSatish Balay 
131448e9ddb2SSatish Balay 
131548e9ddb2SSatish Balay #undef __FUNC__
13165615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1317f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1318f44d3a6dSSatish Balay {
1319f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1320f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1321f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1322f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1323f44d3a6dSSatish Balay 
1324f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1325f44d3a6dSSatish Balay 
1326c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1327c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1328f44d3a6dSSatish Balay 
1329f44d3a6dSSatish Balay   idx   = a->j;
1330f44d3a6dSSatish Balay   v     = a->a;
1331f44d3a6dSSatish Balay   ii    = a->i;
1332f44d3a6dSSatish Balay 
1333f44d3a6dSSatish Balay 
1334f44d3a6dSSatish Balay   if (!a->mult_work) {
1335f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1336f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1337f44d3a6dSSatish Balay   }
1338f44d3a6dSSatish Balay   work = a->mult_work;
1339f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1340f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1341f44d3a6dSSatish Balay     ncols = n*bs;
1342f44d3a6dSSatish Balay     workt = work;
1343f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1344f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1345f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1346f44d3a6dSSatish Balay       workt += bs;
1347f44d3a6dSSatish Balay     }
1348f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1349f44d3a6dSSatish Balay     v += n*bs2;
1350f44d3a6dSSatish Balay     z += bs;
1351f44d3a6dSSatish Balay   }
1352c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1353c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1354f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1355f44d3a6dSSatish Balay   return 0;
1356f44d3a6dSSatish Balay }
1357f44d3a6dSSatish Balay 
13585615d1e5SSatish Balay #undef __FUNC__
13595615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
13601a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1361bb42667fSSatish Balay {
1362bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
13631a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1364bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1365bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
13669df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1367119a934aSSatish Balay 
1368119a934aSSatish Balay 
136990f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
137090f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1371bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1372bb42667fSSatish Balay 
1373119a934aSSatish Balay   idx   = a->j;
1374119a934aSSatish Balay   v     = a->a;
1375119a934aSSatish Balay   ii    = a->i;
1376119a934aSSatish Balay 
1377119a934aSSatish Balay   switch (bs) {
1378119a934aSSatish Balay   case 1:
1379119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1380119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1381119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1382119a934aSSatish Balay       ib = idx + ai[i];
1383119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1384bb1453f0SSatish Balay         rval    = ib[j];
1385bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1386119a934aSSatish Balay       }
1387119a934aSSatish Balay     }
1388119a934aSSatish Balay     break;
1389119a934aSSatish Balay   case 2:
1390119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1391119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1392119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1393119a934aSSatish Balay       ib = idx + ai[i];
1394119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1395119a934aSSatish Balay         rval      = ib[j]*2;
1396bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1397bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1398119a934aSSatish Balay         v += 4;
1399119a934aSSatish Balay       }
1400119a934aSSatish Balay     }
1401119a934aSSatish Balay     break;
1402119a934aSSatish Balay   case 3:
1403119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1404119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1405119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1406119a934aSSatish Balay       ib = idx + ai[i];
1407119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1408119a934aSSatish Balay         rval      = ib[j]*3;
1409bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1410bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1411bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1412119a934aSSatish Balay         v += 9;
1413119a934aSSatish Balay       }
1414119a934aSSatish Balay     }
1415119a934aSSatish Balay     break;
1416119a934aSSatish Balay   case 4:
1417119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1418119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1419119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1420119a934aSSatish Balay       ib = idx + ai[i];
1421119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1422119a934aSSatish Balay         rval      = ib[j]*4;
1423bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1424bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1425bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1426bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1427119a934aSSatish Balay         v += 16;
1428119a934aSSatish Balay       }
1429119a934aSSatish Balay     }
1430119a934aSSatish Balay     break;
1431119a934aSSatish Balay   case 5:
1432119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1433119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1434119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1435119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1436119a934aSSatish Balay       ib = idx + ai[i];
1437119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1438119a934aSSatish Balay         rval      = ib[j]*5;
1439bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1440bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1441bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1442bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1443bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1444119a934aSSatish Balay         v += 25;
1445119a934aSSatish Balay       }
1446119a934aSSatish Balay     }
1447119a934aSSatish Balay     break;
1448119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1449119a934aSSatish Balay     default: {
1450119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1451119a934aSSatish Balay       if (!a->mult_work) {
14523b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1453bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1454119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1455119a934aSSatish Balay       }
1456119a934aSSatish Balay       work = a->mult_work;
1457119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1458119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1459119a934aSSatish Balay         ncols = n*bs;
1460119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1461119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1462119a934aSSatish Balay         v += n*bs2;
1463119a934aSSatish Balay         x += bs;
1464119a934aSSatish Balay         workt = work;
1465119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1466119a934aSSatish Balay           zb = z + bs*(*idx++);
1467bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1468119a934aSSatish Balay           workt += bs;
1469119a934aSSatish Balay         }
1470119a934aSSatish Balay       }
1471119a934aSSatish Balay     }
1472119a934aSSatish Balay   }
14730419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
14740419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1475faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1476faf6580fSSatish Balay   return 0;
1477faf6580fSSatish Balay }
14781c351548SSatish Balay 
14795615d1e5SSatish Balay #undef __FUNC__
14805615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1481faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1482faf6580fSSatish Balay {
1483faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1484faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1485faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1486faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1487faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1488faf6580fSSatish Balay 
1489faf6580fSSatish Balay 
1490faf6580fSSatish Balay 
149190f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
149290f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1493faf6580fSSatish Balay 
1494648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1495648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1496648c1d95SSatish Balay 
1497faf6580fSSatish Balay 
1498faf6580fSSatish Balay   idx   = a->j;
1499faf6580fSSatish Balay   v     = a->a;
1500faf6580fSSatish Balay   ii    = a->i;
1501faf6580fSSatish Balay 
1502faf6580fSSatish Balay   switch (bs) {
1503faf6580fSSatish Balay   case 1:
1504faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1505faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1506faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1507faf6580fSSatish Balay       ib = idx + ai[i];
1508faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1509faf6580fSSatish Balay         rval    = ib[j];
1510faf6580fSSatish Balay         z[rval] += *v++ * x1;
1511faf6580fSSatish Balay       }
1512faf6580fSSatish Balay     }
1513faf6580fSSatish Balay     break;
1514faf6580fSSatish Balay   case 2:
1515faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1516faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1517faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1518faf6580fSSatish Balay       ib = idx + ai[i];
1519faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1520faf6580fSSatish Balay         rval      = ib[j]*2;
1521faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1522faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1523faf6580fSSatish Balay         v += 4;
1524faf6580fSSatish Balay       }
1525faf6580fSSatish Balay     }
1526faf6580fSSatish Balay     break;
1527faf6580fSSatish Balay   case 3:
1528faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1529faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1530faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1531faf6580fSSatish Balay       ib = idx + ai[i];
1532faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1533faf6580fSSatish Balay         rval      = ib[j]*3;
1534faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1535faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1536faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1537faf6580fSSatish Balay         v += 9;
1538faf6580fSSatish Balay       }
1539faf6580fSSatish Balay     }
1540faf6580fSSatish Balay     break;
1541faf6580fSSatish Balay   case 4:
1542faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1543faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1544faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1545faf6580fSSatish Balay       ib = idx + ai[i];
1546faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1547faf6580fSSatish Balay         rval      = ib[j]*4;
1548faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1549faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1550faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1551faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1552faf6580fSSatish Balay         v += 16;
1553faf6580fSSatish Balay       }
1554faf6580fSSatish Balay     }
1555faf6580fSSatish Balay     break;
1556faf6580fSSatish Balay   case 5:
1557faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1558faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1559faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1560faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1561faf6580fSSatish Balay       ib = idx + ai[i];
1562faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1563faf6580fSSatish Balay         rval      = ib[j]*5;
1564faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1565faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1566faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1567faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1568faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1569faf6580fSSatish Balay         v += 25;
1570faf6580fSSatish Balay       }
1571faf6580fSSatish Balay     }
1572faf6580fSSatish Balay     break;
1573faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1574faf6580fSSatish Balay     default: {
1575faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1576faf6580fSSatish Balay       if (!a->mult_work) {
1577faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1578faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1579faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1580faf6580fSSatish Balay       }
1581faf6580fSSatish Balay       work = a->mult_work;
1582faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1583faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1584faf6580fSSatish Balay         ncols = n*bs;
1585faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1586faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1587faf6580fSSatish Balay         v += n*bs2;
1588faf6580fSSatish Balay         x += bs;
1589faf6580fSSatish Balay         workt = work;
1590faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1591faf6580fSSatish Balay           zb = z + bs*(*idx++);
1592faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1593faf6580fSSatish Balay           workt += bs;
1594faf6580fSSatish Balay         }
1595faf6580fSSatish Balay       }
1596faf6580fSSatish Balay     }
1597faf6580fSSatish Balay   }
1598faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1599faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1600faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16012593348eSBarry Smith   return 0;
16022593348eSBarry Smith }
16032593348eSBarry Smith 
16045615d1e5SSatish Balay #undef __FUNC__
16055615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_SeqBAIJ"
16064e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16072593348eSBarry Smith {
1608b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16094e220ebcSLois Curfman McInnes 
16104e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16114e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16124e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16134e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16144e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16154e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16164e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16174e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16184e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16194e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16204e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16214e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16224e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16234e220ebcSLois Curfman McInnes   if (A->factor) {
16244e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16254e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16264e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16274e220ebcSLois Curfman McInnes   } else {
16284e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16294e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16304e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16314e220ebcSLois Curfman McInnes   }
16322593348eSBarry Smith   return 0;
16332593348eSBarry Smith }
16342593348eSBarry Smith 
16355615d1e5SSatish Balay #undef __FUNC__
16365615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
163791d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
163891d316f6SSatish Balay {
163991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
164091d316f6SSatish Balay 
1641e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
164291d316f6SSatish Balay 
164391d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
164491d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1645a7c10996SSatish Balay       (a->nz != b->nz)) {
164691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
164791d316f6SSatish Balay   }
164891d316f6SSatish Balay 
164991d316f6SSatish Balay   /* if the a->i are the same */
165091d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
165191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
165291d316f6SSatish Balay   }
165391d316f6SSatish Balay 
165491d316f6SSatish Balay   /* if a->j are the same */
165591d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
165691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
165791d316f6SSatish Balay   }
165891d316f6SSatish Balay 
165991d316f6SSatish Balay   /* if a->a are the same */
166091d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
166191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
166291d316f6SSatish Balay   }
166391d316f6SSatish Balay   *flg = PETSC_TRUE;
166491d316f6SSatish Balay   return 0;
166591d316f6SSatish Balay 
166691d316f6SSatish Balay }
166791d316f6SSatish Balay 
16685615d1e5SSatish Balay #undef __FUNC__
16695615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
167091d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
167191d316f6SSatish Balay {
167291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16737e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
167417e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
167517e48fc4SSatish Balay 
16760513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
167717e48fc4SSatish Balay   bs   = a->bs;
167817e48fc4SSatish Balay   aa   = a->a;
167917e48fc4SSatish Balay   ai   = a->i;
168017e48fc4SSatish Balay   aj   = a->j;
168117e48fc4SSatish Balay   ambs = a->mbs;
16829df24120SSatish Balay   bs2  = a->bs2;
168391d316f6SSatish Balay 
168491d316f6SSatish Balay   VecSet(&zero,v);
168590f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1686e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
168717e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
168817e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
168917e48fc4SSatish Balay       if (aj[j] == i) {
169017e48fc4SSatish Balay         row  = i*bs;
16917e67e3f9SSatish Balay         aa_j = aa+j*bs2;
16927e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
169391d316f6SSatish Balay         break;
169491d316f6SSatish Balay       }
169591d316f6SSatish Balay     }
169691d316f6SSatish Balay   }
169791d316f6SSatish Balay   return 0;
169891d316f6SSatish Balay }
169991d316f6SSatish Balay 
17005615d1e5SSatish Balay #undef __FUNC__
17015615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17029867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17039867e207SSatish Balay {
17049867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17059867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17067e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17079867e207SSatish Balay 
17089867e207SSatish Balay   ai  = a->i;
17099867e207SSatish Balay   aj  = a->j;
17109867e207SSatish Balay   aa  = a->a;
17119867e207SSatish Balay   m   = a->m;
17129867e207SSatish Balay   n   = a->n;
17139867e207SSatish Balay   bs  = a->bs;
17149867e207SSatish Balay   mbs = a->mbs;
17159df24120SSatish Balay   bs2 = a->bs2;
17169867e207SSatish Balay   if (ll) {
171790f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1718e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17199867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17209867e207SSatish Balay       M  = ai[i+1] - ai[i];
17219867e207SSatish Balay       li = l + i*bs;
17227e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17239867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17247e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17259867e207SSatish Balay           (*v++) *= li[k%bs];
17269867e207SSatish Balay         }
17279867e207SSatish Balay       }
17289867e207SSatish Balay     }
17299867e207SSatish Balay   }
17309867e207SSatish Balay 
17319867e207SSatish Balay   if (rr) {
173290f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1733e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17349867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17359867e207SSatish Balay       M  = ai[i+1] - ai[i];
17367e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17379867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17389867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17399867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17409867e207SSatish Balay           x = ri[k];
17419867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17429867e207SSatish Balay         }
17439867e207SSatish Balay       }
17449867e207SSatish Balay     }
17459867e207SSatish Balay   }
17469867e207SSatish Balay   return 0;
17479867e207SSatish Balay }
17489867e207SSatish Balay 
17499867e207SSatish Balay 
1750b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1751b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17522a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1753736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1754736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17551a6a6d98SBarry Smith 
17561a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17571a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
17581a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
17591a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
17601a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
17611a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
176248e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
17631a6a6d98SBarry Smith 
17647fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
17657fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
17667fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
17677fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
17687fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
17697fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
17702593348eSBarry Smith 
17715615d1e5SSatish Balay #undef __FUNC__
17725615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
1773b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
17742593348eSBarry Smith {
1775b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17762593348eSBarry Smith   Scalar      *v = a->a;
17772593348eSBarry Smith   double      sum = 0.0;
17789df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
17792593348eSBarry Smith 
17802593348eSBarry Smith   if (type == NORM_FROBENIUS) {
17810419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
17822593348eSBarry Smith #if defined(PETSC_COMPLEX)
17832593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
17842593348eSBarry Smith #else
17852593348eSBarry Smith       sum += (*v)*(*v); v++;
17862593348eSBarry Smith #endif
17872593348eSBarry Smith     }
17882593348eSBarry Smith     *norm = sqrt(sum);
17892593348eSBarry Smith   }
17902593348eSBarry Smith   else {
1791e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
17922593348eSBarry Smith   }
17932593348eSBarry Smith   return 0;
17942593348eSBarry Smith }
17952593348eSBarry Smith 
17962593348eSBarry Smith /*
17972593348eSBarry Smith      note: This can only work for identity for row and col. It would
17982593348eSBarry Smith    be good to check this and otherwise generate an error.
17992593348eSBarry Smith */
18005615d1e5SSatish Balay #undef __FUNC__
18015615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
1802b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18032593348eSBarry Smith {
1804b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18052593348eSBarry Smith   Mat         outA;
1806de6a44a3SBarry Smith   int         ierr;
18072593348eSBarry Smith 
1808e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18092593348eSBarry Smith 
18102593348eSBarry Smith   outA          = inA;
18112593348eSBarry Smith   inA->factor   = FACTOR_LU;
18122593348eSBarry Smith   a->row        = row;
18132593348eSBarry Smith   a->col        = col;
18142593348eSBarry Smith 
1815de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
18162593348eSBarry Smith 
18172593348eSBarry Smith   if (!a->diag) {
1818b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18192593348eSBarry Smith   }
18207fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18212593348eSBarry Smith   return 0;
18222593348eSBarry Smith }
18232593348eSBarry Smith 
18245615d1e5SSatish Balay #undef __FUNC__
18255615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
1826b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18272593348eSBarry Smith {
1828b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18299df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1830b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1831b6490206SBarry Smith   PLogFlops(totalnz);
18322593348eSBarry Smith   return 0;
18332593348eSBarry Smith }
18342593348eSBarry Smith 
18355615d1e5SSatish Balay #undef __FUNC__
18365615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18377e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18387e67e3f9SSatish Balay {
18397e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18407e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1841a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18429df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18437e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18447e67e3f9SSatish Balay 
18457e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
18467e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1847e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1848e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1849a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
18507e67e3f9SSatish Balay     nrow = ailen[brow];
18517e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1852e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1853e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1854a7c10996SSatish Balay       col  = in[l] ;
18557e67e3f9SSatish Balay       bcol = col/bs;
18567e67e3f9SSatish Balay       cidx = col%bs;
18577e67e3f9SSatish Balay       ridx = row%bs;
18587e67e3f9SSatish Balay       high = nrow;
18597e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
18607e67e3f9SSatish Balay       while (high-low > 5) {
18617e67e3f9SSatish Balay         t = (low+high)/2;
18627e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
18637e67e3f9SSatish Balay         else             low  = t;
18647e67e3f9SSatish Balay       }
18657e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
18667e67e3f9SSatish Balay         if (rp[i] > bcol) break;
18677e67e3f9SSatish Balay         if (rp[i] == bcol) {
18687e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18697e67e3f9SSatish Balay           goto finished;
18707e67e3f9SSatish Balay         }
18717e67e3f9SSatish Balay       }
18727e67e3f9SSatish Balay       *v++ = zero;
18737e67e3f9SSatish Balay       finished:;
18747e67e3f9SSatish Balay     }
18757e67e3f9SSatish Balay   }
18767e67e3f9SSatish Balay   return 0;
18777e67e3f9SSatish Balay }
18787e67e3f9SSatish Balay 
18795615d1e5SSatish Balay #undef __FUNC__
18805615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
18815a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
18825a838052SSatish Balay {
18835a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
18845a838052SSatish Balay   *bs = baij->bs;
18855a838052SSatish Balay   return 0;
18865a838052SSatish Balay }
18875a838052SSatish Balay 
1888d9b7c43dSSatish Balay /* idx should be of length atlease bs */
18895615d1e5SSatish Balay #undef __FUNC__
18905615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1891d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1892d9b7c43dSSatish Balay {
1893d9b7c43dSSatish Balay   int i,row;
1894d9b7c43dSSatish Balay   row = idx[0];
1895d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1896d9b7c43dSSatish Balay 
1897d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1898d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1899d9b7c43dSSatish Balay   }
1900d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1901d9b7c43dSSatish Balay   return 0;
1902d9b7c43dSSatish Balay }
1903d9b7c43dSSatish Balay 
19045615d1e5SSatish Balay #undef __FUNC__
19055615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
1906d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1907d9b7c43dSSatish Balay {
1908d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1909d9b7c43dSSatish Balay   IS          is_local;
1910d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1911d9b7c43dSSatish Balay   PetscTruth  flg;
1912d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1913d9b7c43dSSatish Balay 
1914d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1915d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1916d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1917537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1918d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1919d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1920d9b7c43dSSatish Balay 
1921d9b7c43dSSatish Balay   i = 0;
1922d9b7c43dSSatish Balay   while (i < is_n) {
1923e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1924d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1925d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1926d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1927d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1928d9b7c43dSSatish Balay       i += bs;
1929d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1930d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1931d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1932d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1933d9b7c43dSSatish Balay         aa[0] = zero;
1934d9b7c43dSSatish Balay         aa+=bs;
1935d9b7c43dSSatish Balay       }
1936d9b7c43dSSatish Balay       i++;
1937d9b7c43dSSatish Balay     }
1938d9b7c43dSSatish Balay   }
1939d9b7c43dSSatish Balay   if (diag) {
1940d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1941d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1942d9b7c43dSSatish Balay     }
1943d9b7c43dSSatish Balay   }
1944d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1945d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1946d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
19479a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1948d9b7c43dSSatish Balay 
1949d9b7c43dSSatish Balay   return 0;
1950d9b7c43dSSatish Balay }
19511c351548SSatish Balay 
19525615d1e5SSatish Balay #undef __FUNC__
19535615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1954ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1955ba4ca20aSSatish Balay {
1956ba4ca20aSSatish Balay   static int called = 0;
1957ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1958ba4ca20aSSatish Balay 
1959ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1960ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1961ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1962ba4ca20aSSatish Balay   return 0;
1963ba4ca20aSSatish Balay }
1964d9b7c43dSSatish Balay 
19652593348eSBarry Smith /* -------------------------------------------------------------------*/
1966cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
19679867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1968f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1969faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
19701a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1971b6490206SBarry Smith        0,0,
1972de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1973b6490206SBarry Smith        0,
1974f2501298SSatish Balay        MatTranspose_SeqBAIJ,
197517e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
19769867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1977584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1978b6490206SBarry Smith        0,
1979d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
19807fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1981b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1982de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1983d402145bSBarry Smith        0,0,
1984b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1985b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1986af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
19877e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1988ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
19893b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
19903b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
199192c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
199292c4ed94SBarry Smith        0,0,0,0,0,0,
199392c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
19942593348eSBarry Smith 
19955615d1e5SSatish Balay #undef __FUNC__
19965615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
19972593348eSBarry Smith /*@C
199844cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
199944cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
200044cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20012bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20022bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20032593348eSBarry Smith 
20042593348eSBarry Smith    Input Parameters:
20052593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
2006b6490206SBarry Smith .  bs - size of block
20072593348eSBarry Smith .  m - number of rows
20082593348eSBarry Smith .  n - number of columns
2009b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20102bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20112bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20122593348eSBarry Smith 
20132593348eSBarry Smith    Output Parameter:
20142593348eSBarry Smith .  A - the matrix
20152593348eSBarry Smith 
20160513a670SBarry Smith    Options Database Keys:
20170513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20180513a670SBarry Smith $                     block calculations (much solver)
20190513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20200513a670SBarry Smith 
20212593348eSBarry Smith    Notes:
202244cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20232593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
202444cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20252593348eSBarry Smith 
20262593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20272593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20282593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20296da5968aSLois Curfman McInnes    matrices.
20302593348eSBarry Smith 
203144cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20322593348eSBarry Smith @*/
2033b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20342593348eSBarry Smith {
20352593348eSBarry Smith   Mat         B;
2036b6490206SBarry Smith   Mat_SeqBAIJ *b;
20373b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20383b2fbd54SBarry Smith 
20393b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2040e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2041b6490206SBarry Smith 
20420513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20430513a670SBarry Smith 
2044f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2045e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
20462593348eSBarry Smith 
20472593348eSBarry Smith   *A = 0;
2048b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
20492593348eSBarry Smith   PLogObjectCreate(B);
2050b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
205144cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
20522593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
20531a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
20541a6a6d98SBarry Smith   if (!flg) {
20557fc0212eSBarry Smith     switch (bs) {
20567fc0212eSBarry Smith     case 1:
20577fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20581a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
205939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2060f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
20617fc0212eSBarry Smith       break;
20624eeb42bcSBarry Smith     case 2:
20634eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
20641a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
206539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2066f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
20676d84be18SBarry Smith       break;
2068f327dd97SBarry Smith     case 3:
2069f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
20701a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
207139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2072f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
20734eeb42bcSBarry Smith       break;
20746d84be18SBarry Smith     case 4:
20756d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
20761a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
207739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2078f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
20796d84be18SBarry Smith       break;
20806d84be18SBarry Smith     case 5:
20816d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
20821a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
208339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2084f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
20856d84be18SBarry Smith       break;
208648e9ddb2SSatish Balay     case 7:
208748e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
208848e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
208948e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
209048e9ddb2SSatish Balay       break;
20917fc0212eSBarry Smith     }
20921a6a6d98SBarry Smith   }
2093b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2094b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
20952593348eSBarry Smith   B->factor           = 0;
20962593348eSBarry Smith   B->lupivotthreshold = 1.0;
209790f02eecSBarry Smith   B->mapping          = 0;
20982593348eSBarry Smith   b->row              = 0;
20992593348eSBarry Smith   b->col              = 0;
21002593348eSBarry Smith   b->reallocs         = 0;
21012593348eSBarry Smith 
210244cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
210344cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2104b6490206SBarry Smith   b->mbs     = mbs;
2105f2501298SSatish Balay   b->nbs     = nbs;
2106b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21072593348eSBarry Smith   if (nnz == PETSC_NULL) {
2108119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21092593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2110b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2111b6490206SBarry Smith     nz = nz*mbs;
21122593348eSBarry Smith   }
21132593348eSBarry Smith   else {
21142593348eSBarry Smith     nz = 0;
2115b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21162593348eSBarry Smith   }
21172593348eSBarry Smith 
21182593348eSBarry Smith   /* allocate the matrix space */
21197e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21202593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21217e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21227e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21232593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21242593348eSBarry Smith   b->i  = b->j + nz;
21252593348eSBarry Smith   b->singlemalloc = 1;
21262593348eSBarry Smith 
2127de6a44a3SBarry Smith   b->i[0] = 0;
2128b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21292593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21302593348eSBarry Smith   }
21312593348eSBarry Smith 
2132b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2133b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2134b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2135b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21362593348eSBarry Smith 
2137b6490206SBarry Smith   b->bs               = bs;
21389df24120SSatish Balay   b->bs2              = bs2;
2139b6490206SBarry Smith   b->mbs              = mbs;
21402593348eSBarry Smith   b->nz               = 0;
214118e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21422593348eSBarry Smith   b->sorted           = 0;
21432593348eSBarry Smith   b->roworiented      = 1;
21442593348eSBarry Smith   b->nonew            = 0;
21452593348eSBarry Smith   b->diag             = 0;
21462593348eSBarry Smith   b->solve_work       = 0;
2147de6a44a3SBarry Smith   b->mult_work        = 0;
21482593348eSBarry Smith   b->spptr            = 0;
21494e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
21504e220ebcSLois Curfman McInnes 
21512593348eSBarry Smith   *A = B;
21522593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
21532593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
21542593348eSBarry Smith   return 0;
21552593348eSBarry Smith }
21562593348eSBarry Smith 
21575615d1e5SSatish Balay #undef __FUNC__
21585615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2159b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
21602593348eSBarry Smith {
21612593348eSBarry Smith   Mat         C;
2162b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
21639df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2164de6a44a3SBarry Smith 
2165e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
21662593348eSBarry Smith 
21672593348eSBarry Smith   *B = 0;
2168b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
21692593348eSBarry Smith   PLogObjectCreate(C);
2170b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
21712593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2172b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2173b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
21742593348eSBarry Smith   C->factor     = A->factor;
21752593348eSBarry Smith   c->row        = 0;
21762593348eSBarry Smith   c->col        = 0;
21772593348eSBarry Smith   C->assembled  = PETSC_TRUE;
21782593348eSBarry Smith 
217944cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
218044cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
218144cd7ae7SLois Curfman McInnes   C->M          = a->m;
218244cd7ae7SLois Curfman McInnes   C->N          = a->n;
218344cd7ae7SLois Curfman McInnes 
2184b6490206SBarry Smith   c->bs         = a->bs;
21859df24120SSatish Balay   c->bs2        = a->bs2;
2186b6490206SBarry Smith   c->mbs        = a->mbs;
2187b6490206SBarry Smith   c->nbs        = a->nbs;
21882593348eSBarry Smith 
2189b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2190b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2191b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
21922593348eSBarry Smith     c->imax[i] = a->imax[i];
21932593348eSBarry Smith     c->ilen[i] = a->ilen[i];
21942593348eSBarry Smith   }
21952593348eSBarry Smith 
21962593348eSBarry Smith   /* allocate the matrix space */
21972593348eSBarry Smith   c->singlemalloc = 1;
21987e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
21992593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22007e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2201de6a44a3SBarry Smith   c->i  = c->j + nz;
2202b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2203b6490206SBarry Smith   if (mbs > 0) {
2204de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22052593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22067e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22072593348eSBarry Smith     }
22082593348eSBarry Smith   }
22092593348eSBarry Smith 
2210b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
22112593348eSBarry Smith   c->sorted      = a->sorted;
22122593348eSBarry Smith   c->roworiented = a->roworiented;
22132593348eSBarry Smith   c->nonew       = a->nonew;
22142593348eSBarry Smith 
22152593348eSBarry Smith   if (a->diag) {
2216b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2217b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2218b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22192593348eSBarry Smith       c->diag[i] = a->diag[i];
22202593348eSBarry Smith     }
22212593348eSBarry Smith   }
22222593348eSBarry Smith   else c->diag          = 0;
22232593348eSBarry Smith   c->nz                 = a->nz;
22242593348eSBarry Smith   c->maxnz              = a->maxnz;
22252593348eSBarry Smith   c->solve_work         = 0;
22262593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22277fc0212eSBarry Smith   c->mult_work          = 0;
22282593348eSBarry Smith   *B = C;
22292593348eSBarry Smith   return 0;
22302593348eSBarry Smith }
22312593348eSBarry Smith 
22325615d1e5SSatish Balay #undef __FUNC__
22335615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
223419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22352593348eSBarry Smith {
2236b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22372593348eSBarry Smith   Mat          B;
2238de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2239b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
224035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2241a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2242b6490206SBarry Smith   Scalar       *aa;
224319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22442593348eSBarry Smith 
22451a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2246de6a44a3SBarry Smith   bs2  = bs*bs;
2247b6490206SBarry Smith 
22482593348eSBarry Smith   MPI_Comm_size(comm,&size);
2249e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
225090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
225177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2252e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
22532593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
22542593348eSBarry Smith 
2255e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
225635aab85fSBarry Smith 
225735aab85fSBarry Smith   /*
225835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
225935aab85fSBarry Smith     divisible by the blocksize
226035aab85fSBarry Smith   */
2261b6490206SBarry Smith   mbs        = M/bs;
226235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
226335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
226435aab85fSBarry Smith   else                  mbs++;
226535aab85fSBarry Smith   if (extra_rows) {
2266537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
226735aab85fSBarry Smith   }
2268b6490206SBarry Smith 
22692593348eSBarry Smith   /* read in row lengths */
227035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
227177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
227235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
22732593348eSBarry Smith 
2274b6490206SBarry Smith   /* read in column indices */
227535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
227677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
227735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2278b6490206SBarry Smith 
2279b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2280b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2281b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
228235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
228335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
228435aab85fSBarry Smith   masked = mask + mbs;
2285b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2286b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
228735aab85fSBarry Smith     nmask = 0;
2288b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2289b6490206SBarry Smith       kmax = rowlengths[rowcount];
2290b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
229135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
229235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2293b6490206SBarry Smith       }
2294b6490206SBarry Smith       rowcount++;
2295b6490206SBarry Smith     }
229635aab85fSBarry Smith     browlengths[i] += nmask;
229735aab85fSBarry Smith     /* zero out the mask elements we set */
229835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2299b6490206SBarry Smith   }
2300b6490206SBarry Smith 
23012593348eSBarry Smith   /* create our matrix */
230235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
230335aab85fSBarry Smith          CHKERRQ(ierr);
23042593348eSBarry Smith   B = *A;
2305b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23062593348eSBarry Smith 
23072593348eSBarry Smith   /* set matrix "i" values */
2308de6a44a3SBarry Smith   a->i[0] = 0;
2309b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2310b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2311b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23122593348eSBarry Smith   }
2313b6490206SBarry Smith   a->nz         = 0;
2314b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23152593348eSBarry Smith 
2316b6490206SBarry Smith   /* read in nonzero values */
231735aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
231877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
231935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2320b6490206SBarry Smith 
2321b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2322b6490206SBarry Smith   nzcount = 0; jcount = 0;
2323b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2324b6490206SBarry Smith     nzcountb = nzcount;
232535aab85fSBarry Smith     nmask    = 0;
2326b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2327b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2328b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
232935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
233035aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2331b6490206SBarry Smith       }
2332b6490206SBarry Smith       rowcount++;
2333b6490206SBarry Smith     }
2334de6a44a3SBarry Smith     /* sort the masked values */
233577c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2336de6a44a3SBarry Smith 
2337b6490206SBarry Smith     /* set "j" values into matrix */
2338b6490206SBarry Smith     maskcount = 1;
233935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
234035aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2341de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2342b6490206SBarry Smith     }
2343b6490206SBarry Smith     /* set "a" values into matrix */
2344de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2345b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2346b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2347b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2348de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2349de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2350de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2351de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2352b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2353b6490206SBarry Smith       }
2354b6490206SBarry Smith     }
235535aab85fSBarry Smith     /* zero out the mask elements we set */
235635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2357b6490206SBarry Smith   }
2358e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2359b6490206SBarry Smith 
2360b6490206SBarry Smith   PetscFree(rowlengths);
2361b6490206SBarry Smith   PetscFree(browlengths);
2362b6490206SBarry Smith   PetscFree(aa);
2363b6490206SBarry Smith   PetscFree(jj);
2364b6490206SBarry Smith   PetscFree(mask);
2365b6490206SBarry Smith 
2366b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2367de6a44a3SBarry Smith 
2368de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2369de6a44a3SBarry Smith   if (flg) {
237019bcc07fSBarry Smith     Viewer tviewer;
237119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2372639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
237319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
237419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2375de6a44a3SBarry Smith   }
2376de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2377de6a44a3SBarry Smith   if (flg) {
237819bcc07fSBarry Smith     Viewer tviewer;
237919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2380639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
238119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
238219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2383de6a44a3SBarry Smith   }
2384de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2385de6a44a3SBarry Smith   if (flg) {
238619bcc07fSBarry Smith     Viewer tviewer;
238719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
238819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
238919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2390de6a44a3SBarry Smith   }
2391de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2392de6a44a3SBarry Smith   if (flg) {
239319bcc07fSBarry Smith     Viewer tviewer;
239419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2395639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
239619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
239719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2398de6a44a3SBarry Smith   }
2399de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2400de6a44a3SBarry Smith   if (flg) {
240119bcc07fSBarry Smith     Viewer tviewer;
240219bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
240319bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
240419bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
240519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2406de6a44a3SBarry Smith   }
24072593348eSBarry Smith   return 0;
24082593348eSBarry Smith }
24092593348eSBarry Smith 
24102593348eSBarry Smith 
24112593348eSBarry Smith 
2412