xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 48e9ddb25f20e29452dc97febc8142938f1b72f7)
12593348eSBarry Smith #ifndef lint
2*48e9ddb2SSatish Balay static char vcid[] = "$Id: baij.c,v 1.87 1997/02/03 20:15:40 bsmith Exp balay $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
195615d1e5SSatish Balay #undef __FUNC__
205615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
21de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
22de6a44a3SBarry Smith {
23de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
247fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
25de6a44a3SBarry Smith 
26de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
27de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
287fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
29de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
30de6a44a3SBarry Smith       if (a->j[j] == i) {
31de6a44a3SBarry Smith         diag[i] = j;
32de6a44a3SBarry Smith         break;
33de6a44a3SBarry Smith       }
34de6a44a3SBarry Smith     }
35de6a44a3SBarry Smith   }
36de6a44a3SBarry Smith   a->diag = diag;
37de6a44a3SBarry Smith   return 0;
38de6a44a3SBarry Smith }
392593348eSBarry Smith 
402593348eSBarry Smith #include "draw.h"
412593348eSBarry Smith #include "pinclude/pviewer.h"
4277c4ece6SBarry Smith #include "sys.h"
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
725615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
935615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
11577c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
1197e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
1307e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
1347e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
1457e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
1515615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes #else
18144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18344cd7ae7SLois Curfman McInnes #endif
18444cd7ae7SLois Curfman McInnes           }
18544cd7ae7SLois Curfman McInnes         }
18644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18744cd7ae7SLois Curfman McInnes       }
18844cd7ae7SLois Curfman McInnes     }
18944cd7ae7SLois Curfman McInnes   }
1902593348eSBarry Smith   else {
191b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
192b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
193b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
194b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
195b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1977e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1997e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20088685aaeSLois Curfman McInnes           }
20188685aaeSLois Curfman McInnes           else {
2027e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes #else
2057e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20688685aaeSLois Curfman McInnes #endif
2072593348eSBarry Smith           }
2082593348eSBarry Smith         }
2092593348eSBarry Smith         fprintf(fd,"\n");
2102593348eSBarry Smith       }
2112593348eSBarry Smith     }
212b6490206SBarry Smith   }
2132593348eSBarry Smith   fflush(fd);
2142593348eSBarry Smith   return 0;
2152593348eSBarry Smith }
2162593348eSBarry Smith 
2175615d1e5SSatish Balay #undef __FUNC__
2185615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Draw"
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
245b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
259b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
274b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
305b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
319b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
334b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
3445615d1e5SSatish Balay #undef __FUNC__
3455615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ"
346b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3472593348eSBarry Smith {
3482593348eSBarry Smith   Mat         A = (Mat) obj;
34919bcc07fSBarry Smith   ViewerType  vtype;
35019bcc07fSBarry Smith   int         ierr;
3512593348eSBarry Smith 
35219bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35319bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
354e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
357b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
360b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3612593348eSBarry Smith   }
36219bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3633270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3642593348eSBarry Smith   }
3652593348eSBarry Smith   return 0;
3662593348eSBarry Smith }
367b6490206SBarry Smith 
368119a934aSSatish Balay #define CHUNKSIZE  10
369cd0e1443SSatish Balay 
3705615d1e5SSatish Balay #undef __FUNC__
3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
373cd0e1443SSatish Balay {
374cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
375cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
376cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
377a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3789df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
379cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
380cd0e1443SSatish Balay 
381cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
382cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
384e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
385e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3863b2fbd54SBarry Smith #endif
3872c3acbe9SBarry Smith     rp   = aj + ai[brow];
3882c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3892c3acbe9SBarry Smith     rmax = imax[brow];
3902c3acbe9SBarry Smith     nrow = ailen[brow];
391cd0e1443SSatish Balay     low  = 0;
392cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
394e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
3963b2fbd54SBarry Smith #endif
397a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
398cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
399cd0e1443SSatish Balay       if (roworiented) {
400cd0e1443SSatish Balay         value = *v++;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       else {
403cd0e1443SSatish Balay         value = v[k + l*m];
404cd0e1443SSatish Balay       }
405cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4062c3acbe9SBarry Smith       while (high-low > 7) {
407cd0e1443SSatish Balay         t = (low+high)/2;
408cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
409cd0e1443SSatish Balay         else              low  = t;
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
412cd0e1443SSatish Balay         if (rp[i] > bcol) break;
413cd0e1443SSatish Balay         if (rp[i] == bcol) {
4147e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
415cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
416cd0e1443SSatish Balay           else                  *bap  = value;
417cd0e1443SSatish Balay           goto noinsert;
418cd0e1443SSatish Balay         }
419cd0e1443SSatish Balay       }
420cd0e1443SSatish Balay       if (nonew) goto noinsert;
421cd0e1443SSatish Balay       if (nrow >= rmax) {
422cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
423cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
424cd0e1443SSatish Balay         Scalar *new_a;
425cd0e1443SSatish Balay 
426cd0e1443SSatish Balay         /* malloc new storage space */
4277e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
428cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4297e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
430cd0e1443SSatish Balay         new_i   = new_j + new_nz;
431cd0e1443SSatish Balay 
432cd0e1443SSatish Balay         /* copy over old data into new slots */
433cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4347e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
435a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
436a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
437a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
438cd0e1443SSatish Balay                                                            len*sizeof(int));
439a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
440a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
441a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
442a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
443cd0e1443SSatish Balay         /* free up old matrix storage */
444cd0e1443SSatish Balay         PetscFree(a->a);
445cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
446cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
447cd0e1443SSatish Balay         a->singlemalloc = 1;
448cd0e1443SSatish Balay 
449a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
450cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4517e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
45218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
453cd0e1443SSatish Balay         a->reallocs++;
454119a934aSSatish Balay         a->nz++;
455cd0e1443SSatish Balay       }
4567e67e3f9SSatish Balay       N = nrow++ - 1;
457cd0e1443SSatish Balay       /* shift up all the later entries in this row */
458cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
459cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4607e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
461cd0e1443SSatish Balay       }
4627e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
463cd0e1443SSatish Balay       rp[i]                      = bcol;
4647e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
465cd0e1443SSatish Balay       noinsert:;
466cd0e1443SSatish Balay       low = i;
467cd0e1443SSatish Balay     }
468cd0e1443SSatish Balay     ailen[brow] = nrow;
469cd0e1443SSatish Balay   }
470cd0e1443SSatish Balay   return 0;
471cd0e1443SSatish Balay }
472cd0e1443SSatish Balay 
4735615d1e5SSatish Balay #undef __FUNC__
47492c4ed94SBarry Smith #define __FUNC__ "MatSetValues_SeqBAIJ"
47592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
47692c4ed94SBarry Smith {
47792c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
47892c4ed94SBarry Smith   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
47992c4ed94SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
48092c4ed94SBarry Smith   int         *aj=a->j,nonew=a->nonew,bs=a->bs;
48192c4ed94SBarry Smith   int          ridx,cidx,bs2=a->bs2;
48292c4ed94SBarry Smith   Scalar      *ap,value,*aa=a->a,*bap;
48392c4ed94SBarry Smith 
48492c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
48592c4ed94SBarry Smith     row  = im[k];
48692c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
48792c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
48892c4ed94SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
48992c4ed94SBarry Smith #endif
49092c4ed94SBarry Smith     rp   = aj + ai[row];
49192c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
49292c4ed94SBarry Smith     rmax = imax[row];
49392c4ed94SBarry Smith     nrow = ailen[row];
49492c4ed94SBarry Smith     low  = 0;
49592c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
49692c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
49792c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
49892c4ed94SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
49992c4ed94SBarry Smith #endif
50092c4ed94SBarry Smith       col = in[l];
50192c4ed94SBarry Smith       ridx = row % bs; cidx = col % bs;
50292c4ed94SBarry Smith       if (roworiented) {
50392c4ed94SBarry Smith         value = *v++;
50492c4ed94SBarry Smith       }
50592c4ed94SBarry Smith       else {
50692c4ed94SBarry Smith         value = v[k + l*m];
50792c4ed94SBarry Smith       }
50892c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
50992c4ed94SBarry Smith       while (high-low > 7) {
51092c4ed94SBarry Smith         t = (low+high)/2;
51192c4ed94SBarry Smith         if (rp[t] > col) high = t;
51292c4ed94SBarry Smith         else             low  = t;
51392c4ed94SBarry Smith       }
51492c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
51592c4ed94SBarry Smith         if (rp[i] > col) break;
51692c4ed94SBarry Smith         if (rp[i] == col) {
51792c4ed94SBarry Smith           bap  = ap +  bs2*i + bs*cidx + ridx;
51892c4ed94SBarry Smith           if (is == ADD_VALUES) *bap += value;
51992c4ed94SBarry Smith           else                  *bap  = value;
52092c4ed94SBarry Smith           goto noinsert;
52192c4ed94SBarry Smith         }
52292c4ed94SBarry Smith       }
52392c4ed94SBarry Smith       if (nonew) goto noinsert;
52492c4ed94SBarry Smith       if (nrow >= rmax) {
52592c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
52692c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
52792c4ed94SBarry Smith         Scalar *new_a;
52892c4ed94SBarry Smith 
52992c4ed94SBarry Smith         /* malloc new storage space */
53092c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
53192c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
53292c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
53392c4ed94SBarry Smith         new_i   = new_j + new_nz;
53492c4ed94SBarry Smith 
53592c4ed94SBarry Smith         /* copy over old data into new slots */
53692c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
53792c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
53892c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
53992c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
54092c4ed94SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,
54192c4ed94SBarry Smith                                                            len*sizeof(int));
54292c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
54392c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
54492c4ed94SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),
54592c4ed94SBarry Smith                     aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
54692c4ed94SBarry Smith         /* free up old matrix storage */
54792c4ed94SBarry Smith         PetscFree(a->a);
54892c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
54992c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
55092c4ed94SBarry Smith         a->singlemalloc = 1;
55192c4ed94SBarry Smith 
55292c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
55392c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
55492c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
55592c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
55692c4ed94SBarry Smith         a->reallocs++;
55792c4ed94SBarry Smith         a->nz++;
55892c4ed94SBarry Smith       }
55992c4ed94SBarry Smith       N = nrow++ - 1;
56092c4ed94SBarry Smith       /* shift up all the later entries in this row */
56192c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
56292c4ed94SBarry Smith         rp[ii+1] = rp[ii];
56392c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
56492c4ed94SBarry Smith       }
56592c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
56692c4ed94SBarry Smith       rp[i]                      = col;
56792c4ed94SBarry Smith       ap[bs2*i + bs*cidx + ridx] = value;
56892c4ed94SBarry Smith       noinsert:;
56992c4ed94SBarry Smith       low = i;
57092c4ed94SBarry Smith     }
57192c4ed94SBarry Smith     ailen[row] = nrow;
57292c4ed94SBarry Smith   }
57392c4ed94SBarry Smith   return 0;
57492c4ed94SBarry Smith }
57592c4ed94SBarry Smith 
57692c4ed94SBarry Smith #undef __FUNC__
5775615d1e5SSatish Balay #define __FUNC__ "MatGetSize_SeqBAIJ"
5780b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
5790b824a48SSatish Balay {
5800b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5810b824a48SSatish Balay   *m = a->m; *n = a->n;
5820b824a48SSatish Balay   return 0;
5830b824a48SSatish Balay }
5840b824a48SSatish Balay 
5855615d1e5SSatish Balay #undef __FUNC__
5865615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
5870b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
5880b824a48SSatish Balay {
5890b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5900b824a48SSatish Balay   *m = 0; *n = a->m;
5910b824a48SSatish Balay   return 0;
5920b824a48SSatish Balay }
5930b824a48SSatish Balay 
5945615d1e5SSatish Balay #undef __FUNC__
5955615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
5969867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5979867e207SSatish Balay {
5989867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5997e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6009867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6019867e207SSatish Balay 
6029867e207SSatish Balay   bs  = a->bs;
6039867e207SSatish Balay   ai  = a->i;
6049867e207SSatish Balay   aj  = a->j;
6059867e207SSatish Balay   aa  = a->a;
6069df24120SSatish Balay   bs2 = a->bs2;
6079867e207SSatish Balay 
608e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6099867e207SSatish Balay 
6109867e207SSatish Balay   bn  = row/bs;   /* Block number */
6119867e207SSatish Balay   bp  = row % bs; /* Block Position */
6129867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6139867e207SSatish Balay   *nz = bs*M;
6149867e207SSatish Balay 
6159867e207SSatish Balay   if (v) {
6169867e207SSatish Balay     *v = 0;
6179867e207SSatish Balay     if (*nz) {
6189867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6199867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6209867e207SSatish Balay         v_i  = *v + i*bs;
6217e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6227e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6239867e207SSatish Balay       }
6249867e207SSatish Balay     }
6259867e207SSatish Balay   }
6269867e207SSatish Balay 
6279867e207SSatish Balay   if (idx) {
6289867e207SSatish Balay     *idx = 0;
6299867e207SSatish Balay     if (*nz) {
6309867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6319867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6329867e207SSatish Balay         idx_i = *idx + i*bs;
6339867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6349867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6359867e207SSatish Balay       }
6369867e207SSatish Balay     }
6379867e207SSatish Balay   }
6389867e207SSatish Balay   return 0;
6399867e207SSatish Balay }
6409867e207SSatish Balay 
6415615d1e5SSatish Balay #undef __FUNC__
6425615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6439867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6449867e207SSatish Balay {
6459867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
6469867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
6479867e207SSatish Balay   return 0;
6489867e207SSatish Balay }
649b6490206SBarry Smith 
6505615d1e5SSatish Balay #undef __FUNC__
6515615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
652f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
653f2501298SSatish Balay {
654f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
655f2501298SSatish Balay   Mat         C;
656f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
6579df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
658f2501298SSatish Balay   Scalar      *array=a->a;
659f2501298SSatish Balay 
660f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
661e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
662f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
663f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
664a7c10996SSatish Balay 
665a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
666f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
667f2501298SSatish Balay   PetscFree(col);
668f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
669f2501298SSatish Balay   cols = rows + bs;
670f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
671f2501298SSatish Balay     cols[0] = i*bs;
672f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
673f2501298SSatish Balay     len = ai[i+1] - ai[i];
674f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
675f2501298SSatish Balay       rows[0] = (*aj++)*bs;
676f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
677f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
678f2501298SSatish Balay       array += bs2;
679f2501298SSatish Balay     }
680f2501298SSatish Balay   }
6811073c447SSatish Balay   PetscFree(rows);
682f2501298SSatish Balay 
6836d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6846d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
685f2501298SSatish Balay 
686f2501298SSatish Balay   if (B != PETSC_NULL) {
687f2501298SSatish Balay     *B = C;
688f2501298SSatish Balay   } else {
689f2501298SSatish Balay     /* This isn't really an in-place transpose */
690f2501298SSatish Balay     PetscFree(a->a);
691f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
692f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
693f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
694f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
695f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
696f2501298SSatish Balay     PetscFree(a);
697f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
698f2501298SSatish Balay     PetscHeaderDestroy(C);
699f2501298SSatish Balay   }
700f2501298SSatish Balay   return 0;
701f2501298SSatish Balay }
702f2501298SSatish Balay 
703f2501298SSatish Balay 
7045615d1e5SSatish Balay #undef __FUNC__
7055615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
706584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
707584200bdSSatish Balay {
708584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
709584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
710a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
711d402145bSBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax;
712584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
713584200bdSSatish Balay 
7146d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
715584200bdSSatish Balay 
716d402145bSBarry Smith   rmax = ailen[0];
717584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
718584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
719584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
720d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
721584200bdSSatish Balay     if (fshift) {
722a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
723584200bdSSatish Balay       N = ailen[i];
724584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
725584200bdSSatish Balay         ip[j-fshift] = ip[j];
7267e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
727584200bdSSatish Balay       }
728584200bdSSatish Balay     }
729584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
730584200bdSSatish Balay   }
731584200bdSSatish Balay   if (mbs) {
732584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
733584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
734584200bdSSatish Balay   }
735584200bdSSatish Balay   /* reset ilen and imax for each row */
736584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
737584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
738584200bdSSatish Balay   }
739a7c10996SSatish Balay   a->nz = ai[mbs];
740584200bdSSatish Balay 
741584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
742584200bdSSatish Balay   if (fshift && a->diag) {
743584200bdSSatish Balay     PetscFree(a->diag);
744584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
745584200bdSSatish Balay     a->diag = 0;
746584200bdSSatish Balay   }
7473d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
748219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
7493d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
750584200bdSSatish Balay            a->reallocs);
751d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
752e2f3b5e9SSatish Balay   a->reallocs          = 0;
7534e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
7544e220ebcSLois Curfman McInnes 
755584200bdSSatish Balay   return 0;
756584200bdSSatish Balay }
757584200bdSSatish Balay 
7585615d1e5SSatish Balay #undef __FUNC__
7595615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
760b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
7612593348eSBarry Smith {
762b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7639df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
7642593348eSBarry Smith   return 0;
7652593348eSBarry Smith }
7662593348eSBarry Smith 
7675615d1e5SSatish Balay #undef __FUNC__
7685615d1e5SSatish Balay #define __FUNC__ "MatDestroy_SeqBAIJ"
769b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
7702593348eSBarry Smith {
7712593348eSBarry Smith   Mat         A  = (Mat) obj;
772b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
77390f02eecSBarry Smith   int         ierr;
7742593348eSBarry Smith 
7752593348eSBarry Smith #if defined(PETSC_LOG)
7762593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
7772593348eSBarry Smith #endif
7782593348eSBarry Smith   PetscFree(a->a);
7792593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
7802593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
7812593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
7822593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
7832593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
784de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
7852593348eSBarry Smith   PetscFree(a);
78690f02eecSBarry Smith   if (A->mapping) {
78790f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
78890f02eecSBarry Smith   }
789f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
790f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
7912593348eSBarry Smith   return 0;
7922593348eSBarry Smith }
7932593348eSBarry Smith 
7945615d1e5SSatish Balay #undef __FUNC__
7955615d1e5SSatish Balay #define __FUNC__ "MatSetOption_SeqBAIJ"
796b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
7972593348eSBarry Smith {
798b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7996d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
8006d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
8016d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
802219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
8036d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
8046d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
8056d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
806219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8076d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8086d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
80990f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
81090f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
81194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8126d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
813e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8142593348eSBarry Smith   else
815e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8162593348eSBarry Smith   return 0;
8172593348eSBarry Smith }
8182593348eSBarry Smith 
8192593348eSBarry Smith 
8202593348eSBarry Smith /* -------------------------------------------------------*/
8212593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8222593348eSBarry Smith /* -------------------------------------------------------*/
823b6490206SBarry Smith #include "pinclude/plapack.h"
824b6490206SBarry Smith 
8255615d1e5SSatish Balay #undef __FUNC__
8265615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
82739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8282593348eSBarry Smith {
829b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
83039b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
831c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8322593348eSBarry Smith 
833c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
834c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
835b6490206SBarry Smith 
836119a934aSSatish Balay   idx   = a->j;
837119a934aSSatish Balay   v     = a->a;
838119a934aSSatish Balay   ii    = a->i;
839119a934aSSatish Balay 
840119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
841119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
842119a934aSSatish Balay     sum  = 0.0;
843119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8441a6a6d98SBarry Smith     z[i] = sum;
845119a934aSSatish Balay   }
846c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
847c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
84839b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
84939b95ed1SBarry Smith   return 0;
85039b95ed1SBarry Smith }
85139b95ed1SBarry Smith 
8525615d1e5SSatish Balay #undef __FUNC__
8535615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
85439b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
85539b95ed1SBarry Smith {
85639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
85739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
85839b95ed1SBarry Smith   register Scalar x1,x2;
85939b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
860c16cb8f2SBarry Smith   int             j,n;
86139b95ed1SBarry Smith 
862c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
863c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
86439b95ed1SBarry Smith 
86539b95ed1SBarry Smith   idx   = a->j;
86639b95ed1SBarry Smith   v     = a->a;
86739b95ed1SBarry Smith   ii    = a->i;
86839b95ed1SBarry Smith 
869119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
870119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
871119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
872119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
873119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
874119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
875119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
876119a934aSSatish Balay       v += 4;
877119a934aSSatish Balay     }
8781a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
879119a934aSSatish Balay     z += 2;
880119a934aSSatish Balay   }
881c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
882c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
88339b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
88439b95ed1SBarry Smith   return 0;
88539b95ed1SBarry Smith }
88639b95ed1SBarry Smith 
8875615d1e5SSatish Balay #undef __FUNC__
8885615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
88939b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
89039b95ed1SBarry Smith {
89139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
89239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
893c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
89439b95ed1SBarry Smith 
895c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
896c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
89739b95ed1SBarry Smith 
89839b95ed1SBarry Smith   idx   = a->j;
89939b95ed1SBarry Smith   v     = a->a;
90039b95ed1SBarry Smith   ii    = a->i;
90139b95ed1SBarry Smith 
902119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
903119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
904119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
905119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
906119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
907119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
908119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
909119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
910119a934aSSatish Balay       v += 9;
911119a934aSSatish Balay     }
9121a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
913119a934aSSatish Balay     z += 3;
914119a934aSSatish Balay   }
915c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
916c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
91739b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
91839b95ed1SBarry Smith   return 0;
91939b95ed1SBarry Smith }
92039b95ed1SBarry Smith 
9215615d1e5SSatish Balay #undef __FUNC__
9225615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
92339b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
92439b95ed1SBarry Smith {
92539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
92639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
92739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
92839b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
929c16cb8f2SBarry Smith   int             j,n;
93039b95ed1SBarry Smith 
931c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
932c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
93339b95ed1SBarry Smith 
93439b95ed1SBarry Smith   idx   = a->j;
93539b95ed1SBarry Smith   v     = a->a;
93639b95ed1SBarry Smith   ii    = a->i;
93739b95ed1SBarry Smith 
938119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
939119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
940119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
941119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
942119a934aSSatish Balay       xb = x + 4*(*idx++);
943119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
944119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
945119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
946119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
947119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
948119a934aSSatish Balay       v += 16;
949119a934aSSatish Balay     }
9501a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
951119a934aSSatish Balay     z += 4;
952119a934aSSatish Balay   }
953c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
954c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
95539b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
95639b95ed1SBarry Smith   return 0;
95739b95ed1SBarry Smith }
95839b95ed1SBarry Smith 
9595615d1e5SSatish Balay #undef __FUNC__
9605615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
96139b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
96239b95ed1SBarry Smith {
96339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
96439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
96539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
966c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
96739b95ed1SBarry Smith 
968c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
969c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
97039b95ed1SBarry Smith 
97139b95ed1SBarry Smith   idx   = a->j;
97239b95ed1SBarry Smith   v     = a->a;
97339b95ed1SBarry Smith   ii    = a->i;
97439b95ed1SBarry Smith 
975119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
976119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
977119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
978119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
979119a934aSSatish Balay       xb = x + 5*(*idx++);
980119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
981119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
982119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
983119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
984119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
985119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
986119a934aSSatish Balay       v += 25;
987119a934aSSatish Balay     }
9881a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
989119a934aSSatish Balay     z += 5;
990119a934aSSatish Balay   }
991c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
992c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
99339b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
99439b95ed1SBarry Smith   return 0;
99539b95ed1SBarry Smith }
99639b95ed1SBarry Smith 
9975615d1e5SSatish Balay #undef __FUNC__
998*48e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
999*48e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
1000*48e9ddb2SSatish Balay {
1001*48e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1002*48e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1003*48e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
1004*48e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
1005*48e9ddb2SSatish Balay 
1006*48e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
1007*48e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
1008*48e9ddb2SSatish Balay 
1009*48e9ddb2SSatish Balay   idx   = a->j;
1010*48e9ddb2SSatish Balay   v     = a->a;
1011*48e9ddb2SSatish Balay   ii    = a->i;
1012*48e9ddb2SSatish Balay 
1013*48e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
1014*48e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
1015*48e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1016*48e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
1017*48e9ddb2SSatish Balay       xb = x + 7*(*idx++);
1018*48e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1019*48e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1020*48e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1021*48e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1022*48e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1023*48e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1024*48e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1025*48e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1026*48e9ddb2SSatish Balay       v += 49;
1027*48e9ddb2SSatish Balay     }
1028*48e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1029*48e9ddb2SSatish Balay     z += 7;
1030*48e9ddb2SSatish Balay   }
1031*48e9ddb2SSatish Balay 
1032*48e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
1033*48e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
1034*48e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
1035*48e9ddb2SSatish Balay   return 0;
1036*48e9ddb2SSatish Balay }
1037*48e9ddb2SSatish Balay 
1038*48e9ddb2SSatish Balay #undef __FUNC__
10395615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
104039b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
104139b95ed1SBarry Smith {
104239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
104339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1044c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1045c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1046c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
104739b95ed1SBarry Smith 
1048c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1049c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
105039b95ed1SBarry Smith 
105139b95ed1SBarry Smith   idx   = a->j;
105239b95ed1SBarry Smith   v     = a->a;
105339b95ed1SBarry Smith   ii    = a->i;
105439b95ed1SBarry Smith 
105539b95ed1SBarry Smith 
1056119a934aSSatish Balay   if (!a->mult_work) {
10573b547af2SSatish Balay     k = PetscMax(a->m,a->n);
10582b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1059119a934aSSatish Balay   }
1060119a934aSSatish Balay   work = a->mult_work;
1061119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1062119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1063119a934aSSatish Balay     ncols = n*bs;
1064119a934aSSatish Balay     workt = work;
1065119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1066119a934aSSatish Balay       xb = x + bs*(*idx++);
1067119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1068119a934aSSatish Balay       workt += bs;
1069119a934aSSatish Balay     }
10701a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1071119a934aSSatish Balay     v += n*bs2;
1072119a934aSSatish Balay     z += bs;
1073119a934aSSatish Balay   }
1074c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1075c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
10761a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1077bb42667fSSatish Balay   return 0;
1078bb42667fSSatish Balay }
1079bb42667fSSatish Balay 
10805615d1e5SSatish Balay #undef __FUNC__
10815615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1082f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1083f44d3a6dSSatish Balay {
1084f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1085f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1086c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1087f44d3a6dSSatish Balay 
1088c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1089c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1090c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1091f44d3a6dSSatish Balay 
1092f44d3a6dSSatish Balay   idx   = a->j;
1093f44d3a6dSSatish Balay   v     = a->a;
1094f44d3a6dSSatish Balay   ii    = a->i;
1095f44d3a6dSSatish Balay 
1096f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1097f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1098f44d3a6dSSatish Balay     sum  = y[i];
1099f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1100f44d3a6dSSatish Balay     z[i] = sum;
1101f44d3a6dSSatish Balay   }
1102c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1103c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1104c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1105f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1106f44d3a6dSSatish Balay   return 0;
1107f44d3a6dSSatish Balay }
1108f44d3a6dSSatish Balay 
11095615d1e5SSatish Balay #undef __FUNC__
11105615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1111f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1112f44d3a6dSSatish Balay {
1113f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1114f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1115f44d3a6dSSatish Balay   register Scalar x1,x2;
1116f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1117c16cb8f2SBarry Smith   int             j,n;
1118f44d3a6dSSatish Balay 
1119c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1120c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1121c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1122f44d3a6dSSatish Balay 
1123f44d3a6dSSatish Balay   idx   = a->j;
1124f44d3a6dSSatish Balay   v     = a->a;
1125f44d3a6dSSatish Balay   ii    = a->i;
1126f44d3a6dSSatish Balay 
1127f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1128f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1129f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1130f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1131f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1132f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1133f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1134f44d3a6dSSatish Balay       v += 4;
1135f44d3a6dSSatish Balay     }
1136f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1137f44d3a6dSSatish Balay     z += 2; y += 2;
1138f44d3a6dSSatish Balay   }
1139c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1140c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1141c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1142f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1143f44d3a6dSSatish Balay   return 0;
1144f44d3a6dSSatish Balay }
1145f44d3a6dSSatish Balay 
11465615d1e5SSatish Balay #undef __FUNC__
11475615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1148f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1149f44d3a6dSSatish Balay {
1150f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1151f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1152c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1153f44d3a6dSSatish Balay 
1154c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1155c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1156c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1157f44d3a6dSSatish Balay 
1158f44d3a6dSSatish Balay   idx   = a->j;
1159f44d3a6dSSatish Balay   v     = a->a;
1160f44d3a6dSSatish Balay   ii    = a->i;
1161f44d3a6dSSatish Balay 
1162f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1163f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1164f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1165f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1166f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1167f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1168f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1169f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1170f44d3a6dSSatish Balay       v += 9;
1171f44d3a6dSSatish Balay     }
1172f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1173f44d3a6dSSatish Balay     z += 3; y += 3;
1174f44d3a6dSSatish Balay   }
1175c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1176c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1177c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1178f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1179f44d3a6dSSatish Balay   return 0;
1180f44d3a6dSSatish Balay }
1181f44d3a6dSSatish Balay 
11825615d1e5SSatish Balay #undef __FUNC__
11835615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1184f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1185f44d3a6dSSatish Balay {
1186f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1187f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1188f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1189f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1190c16cb8f2SBarry Smith   int             j,n;
1191f44d3a6dSSatish Balay 
1192c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1193c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1194c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1195f44d3a6dSSatish Balay 
1196f44d3a6dSSatish Balay   idx   = a->j;
1197f44d3a6dSSatish Balay   v     = a->a;
1198f44d3a6dSSatish Balay   ii    = a->i;
1199f44d3a6dSSatish Balay 
1200f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1201f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1202f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1203f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1204f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1205f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1206f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1207f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1208f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1209f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1210f44d3a6dSSatish Balay       v += 16;
1211f44d3a6dSSatish Balay     }
1212f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1213f44d3a6dSSatish Balay     z += 4; y += 4;
1214f44d3a6dSSatish Balay   }
1215c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1216c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1217c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1218f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1219f44d3a6dSSatish Balay   return 0;
1220f44d3a6dSSatish Balay }
1221f44d3a6dSSatish Balay 
12225615d1e5SSatish Balay #undef __FUNC__
12235615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1224f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1225f44d3a6dSSatish Balay {
1226f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1227f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1228f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1229c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1230f44d3a6dSSatish Balay 
1231c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1232c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1233c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1234f44d3a6dSSatish Balay 
1235f44d3a6dSSatish Balay   idx   = a->j;
1236f44d3a6dSSatish Balay   v     = a->a;
1237f44d3a6dSSatish Balay   ii    = a->i;
1238f44d3a6dSSatish Balay 
1239f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1240f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1241f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1242f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1243f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1244f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1245f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1246f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1247f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1248f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1249f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1250f44d3a6dSSatish Balay       v += 25;
1251f44d3a6dSSatish Balay     }
1252f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1253f44d3a6dSSatish Balay     z += 5; y += 5;
1254f44d3a6dSSatish Balay   }
1255c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1256c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1257c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1258f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1259f44d3a6dSSatish Balay   return 0;
1260f44d3a6dSSatish Balay }
1261f44d3a6dSSatish Balay 
12625615d1e5SSatish Balay #undef __FUNC__
1263*48e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
1264*48e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1265*48e9ddb2SSatish Balay {
1266*48e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1267*48e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1268*48e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
1269*48e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
1270*48e9ddb2SSatish Balay 
1271*48e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
1272*48e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
1273*48e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
1274*48e9ddb2SSatish Balay 
1275*48e9ddb2SSatish Balay   idx   = a->j;
1276*48e9ddb2SSatish Balay   v     = a->a;
1277*48e9ddb2SSatish Balay   ii    = a->i;
1278*48e9ddb2SSatish Balay 
1279*48e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
1280*48e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
1281*48e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1282*48e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
1283*48e9ddb2SSatish Balay       xb = x + 7*(*idx++);
1284*48e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1285*48e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1286*48e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1287*48e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1288*48e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1289*48e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1290*48e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1291*48e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1292*48e9ddb2SSatish Balay       v += 49;
1293*48e9ddb2SSatish Balay     }
1294*48e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1295*48e9ddb2SSatish Balay     z += 7; y += 7;
1296*48e9ddb2SSatish Balay   }
1297*48e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
1298*48e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
1299*48e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
1300*48e9ddb2SSatish Balay   PLogFlops(98*a->nz);
1301*48e9ddb2SSatish Balay   return 0;
1302*48e9ddb2SSatish Balay }
1303*48e9ddb2SSatish Balay 
1304*48e9ddb2SSatish Balay 
1305*48e9ddb2SSatish Balay #undef __FUNC__
13065615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1307f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1308f44d3a6dSSatish Balay {
1309f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1310f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1311f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1312f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1313f44d3a6dSSatish Balay 
1314f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1315f44d3a6dSSatish Balay 
1316c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1317c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1318f44d3a6dSSatish Balay 
1319f44d3a6dSSatish Balay   idx   = a->j;
1320f44d3a6dSSatish Balay   v     = a->a;
1321f44d3a6dSSatish Balay   ii    = a->i;
1322f44d3a6dSSatish Balay 
1323f44d3a6dSSatish Balay 
1324f44d3a6dSSatish Balay   if (!a->mult_work) {
1325f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1326f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1327f44d3a6dSSatish Balay   }
1328f44d3a6dSSatish Balay   work = a->mult_work;
1329f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1330f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1331f44d3a6dSSatish Balay     ncols = n*bs;
1332f44d3a6dSSatish Balay     workt = work;
1333f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1334f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1335f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1336f44d3a6dSSatish Balay       workt += bs;
1337f44d3a6dSSatish Balay     }
1338f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1339f44d3a6dSSatish Balay     v += n*bs2;
1340f44d3a6dSSatish Balay     z += bs;
1341f44d3a6dSSatish Balay   }
1342c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1343c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1344f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1345f44d3a6dSSatish Balay   return 0;
1346f44d3a6dSSatish Balay }
1347f44d3a6dSSatish Balay 
13485615d1e5SSatish Balay #undef __FUNC__
13495615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
13501a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1351bb42667fSSatish Balay {
1352bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
13531a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1354bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1355bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
13569df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1357119a934aSSatish Balay 
1358119a934aSSatish Balay 
135990f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
136090f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1361bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1362bb42667fSSatish Balay 
1363119a934aSSatish Balay   idx   = a->j;
1364119a934aSSatish Balay   v     = a->a;
1365119a934aSSatish Balay   ii    = a->i;
1366119a934aSSatish Balay 
1367119a934aSSatish Balay   switch (bs) {
1368119a934aSSatish Balay   case 1:
1369119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1370119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1371119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1372119a934aSSatish Balay       ib = idx + ai[i];
1373119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1374bb1453f0SSatish Balay         rval    = ib[j];
1375bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1376119a934aSSatish Balay       }
1377119a934aSSatish Balay     }
1378119a934aSSatish Balay     break;
1379119a934aSSatish Balay   case 2:
1380119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1381119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1382119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1383119a934aSSatish Balay       ib = idx + ai[i];
1384119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1385119a934aSSatish Balay         rval      = ib[j]*2;
1386bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1387bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1388119a934aSSatish Balay         v += 4;
1389119a934aSSatish Balay       }
1390119a934aSSatish Balay     }
1391119a934aSSatish Balay     break;
1392119a934aSSatish Balay   case 3:
1393119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1394119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1395119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1396119a934aSSatish Balay       ib = idx + ai[i];
1397119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1398119a934aSSatish Balay         rval      = ib[j]*3;
1399bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1400bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1401bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1402119a934aSSatish Balay         v += 9;
1403119a934aSSatish Balay       }
1404119a934aSSatish Balay     }
1405119a934aSSatish Balay     break;
1406119a934aSSatish Balay   case 4:
1407119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1408119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1409119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1410119a934aSSatish Balay       ib = idx + ai[i];
1411119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1412119a934aSSatish Balay         rval      = ib[j]*4;
1413bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1414bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1415bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1416bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1417119a934aSSatish Balay         v += 16;
1418119a934aSSatish Balay       }
1419119a934aSSatish Balay     }
1420119a934aSSatish Balay     break;
1421119a934aSSatish Balay   case 5:
1422119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1423119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1424119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1425119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1426119a934aSSatish Balay       ib = idx + ai[i];
1427119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1428119a934aSSatish Balay         rval      = ib[j]*5;
1429bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1430bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1431bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1432bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1433bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1434119a934aSSatish Balay         v += 25;
1435119a934aSSatish Balay       }
1436119a934aSSatish Balay     }
1437119a934aSSatish Balay     break;
1438119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1439119a934aSSatish Balay     default: {
1440119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1441119a934aSSatish Balay       if (!a->mult_work) {
14423b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1443bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1444119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1445119a934aSSatish Balay       }
1446119a934aSSatish Balay       work = a->mult_work;
1447119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1448119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1449119a934aSSatish Balay         ncols = n*bs;
1450119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1451119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1452119a934aSSatish Balay         v += n*bs2;
1453119a934aSSatish Balay         x += bs;
1454119a934aSSatish Balay         workt = work;
1455119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1456119a934aSSatish Balay           zb = z + bs*(*idx++);
1457bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1458119a934aSSatish Balay           workt += bs;
1459119a934aSSatish Balay         }
1460119a934aSSatish Balay       }
1461119a934aSSatish Balay     }
1462119a934aSSatish Balay   }
14630419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
14640419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1465faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1466faf6580fSSatish Balay   return 0;
1467faf6580fSSatish Balay }
14681c351548SSatish Balay 
14695615d1e5SSatish Balay #undef __FUNC__
14705615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
1471faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1472faf6580fSSatish Balay {
1473faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1474faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1475faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1476faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1477faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1478faf6580fSSatish Balay 
1479faf6580fSSatish Balay 
1480faf6580fSSatish Balay 
148190f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
148290f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1483faf6580fSSatish Balay 
1484648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1485648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1486648c1d95SSatish Balay 
1487faf6580fSSatish Balay 
1488faf6580fSSatish Balay   idx   = a->j;
1489faf6580fSSatish Balay   v     = a->a;
1490faf6580fSSatish Balay   ii    = a->i;
1491faf6580fSSatish Balay 
1492faf6580fSSatish Balay   switch (bs) {
1493faf6580fSSatish Balay   case 1:
1494faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1495faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1496faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1497faf6580fSSatish Balay       ib = idx + ai[i];
1498faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1499faf6580fSSatish Balay         rval    = ib[j];
1500faf6580fSSatish Balay         z[rval] += *v++ * x1;
1501faf6580fSSatish Balay       }
1502faf6580fSSatish Balay     }
1503faf6580fSSatish Balay     break;
1504faf6580fSSatish Balay   case 2:
1505faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1506faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1507faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1508faf6580fSSatish Balay       ib = idx + ai[i];
1509faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1510faf6580fSSatish Balay         rval      = ib[j]*2;
1511faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1512faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1513faf6580fSSatish Balay         v += 4;
1514faf6580fSSatish Balay       }
1515faf6580fSSatish Balay     }
1516faf6580fSSatish Balay     break;
1517faf6580fSSatish Balay   case 3:
1518faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1519faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1520faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1521faf6580fSSatish Balay       ib = idx + ai[i];
1522faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1523faf6580fSSatish Balay         rval      = ib[j]*3;
1524faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1525faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1526faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1527faf6580fSSatish Balay         v += 9;
1528faf6580fSSatish Balay       }
1529faf6580fSSatish Balay     }
1530faf6580fSSatish Balay     break;
1531faf6580fSSatish Balay   case 4:
1532faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1533faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1534faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1535faf6580fSSatish Balay       ib = idx + ai[i];
1536faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1537faf6580fSSatish Balay         rval      = ib[j]*4;
1538faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1539faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1540faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1541faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1542faf6580fSSatish Balay         v += 16;
1543faf6580fSSatish Balay       }
1544faf6580fSSatish Balay     }
1545faf6580fSSatish Balay     break;
1546faf6580fSSatish Balay   case 5:
1547faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1548faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1549faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1550faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1551faf6580fSSatish Balay       ib = idx + ai[i];
1552faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1553faf6580fSSatish Balay         rval      = ib[j]*5;
1554faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1555faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1556faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1557faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1558faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1559faf6580fSSatish Balay         v += 25;
1560faf6580fSSatish Balay       }
1561faf6580fSSatish Balay     }
1562faf6580fSSatish Balay     break;
1563faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1564faf6580fSSatish Balay     default: {
1565faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1566faf6580fSSatish Balay       if (!a->mult_work) {
1567faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1568faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1569faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1570faf6580fSSatish Balay       }
1571faf6580fSSatish Balay       work = a->mult_work;
1572faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1573faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1574faf6580fSSatish Balay         ncols = n*bs;
1575faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1576faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1577faf6580fSSatish Balay         v += n*bs2;
1578faf6580fSSatish Balay         x += bs;
1579faf6580fSSatish Balay         workt = work;
1580faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1581faf6580fSSatish Balay           zb = z + bs*(*idx++);
1582faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1583faf6580fSSatish Balay           workt += bs;
1584faf6580fSSatish Balay         }
1585faf6580fSSatish Balay       }
1586faf6580fSSatish Balay     }
1587faf6580fSSatish Balay   }
1588faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1589faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1590faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
15912593348eSBarry Smith   return 0;
15922593348eSBarry Smith }
15932593348eSBarry Smith 
15945615d1e5SSatish Balay #undef __FUNC__
15955615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_SeqBAIJ"
15964e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
15972593348eSBarry Smith {
1598b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15994e220ebcSLois Curfman McInnes 
16004e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16014e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16024e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16034e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16044e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16054e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16064e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16074e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16084e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16094e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16104e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16114e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16124e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16134e220ebcSLois Curfman McInnes   if (A->factor) {
16144e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16154e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16164e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16174e220ebcSLois Curfman McInnes   } else {
16184e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16194e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16204e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16214e220ebcSLois Curfman McInnes   }
16222593348eSBarry Smith   return 0;
16232593348eSBarry Smith }
16242593348eSBarry Smith 
16255615d1e5SSatish Balay #undef __FUNC__
16265615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
162791d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
162891d316f6SSatish Balay {
162991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
163091d316f6SSatish Balay 
1631e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
163291d316f6SSatish Balay 
163391d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
163491d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1635a7c10996SSatish Balay       (a->nz != b->nz)) {
163691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
163791d316f6SSatish Balay   }
163891d316f6SSatish Balay 
163991d316f6SSatish Balay   /* if the a->i are the same */
164091d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
164191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
164291d316f6SSatish Balay   }
164391d316f6SSatish Balay 
164491d316f6SSatish Balay   /* if a->j are the same */
164591d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
164691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
164791d316f6SSatish Balay   }
164891d316f6SSatish Balay 
164991d316f6SSatish Balay   /* if a->a are the same */
165091d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
165191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
165291d316f6SSatish Balay   }
165391d316f6SSatish Balay   *flg = PETSC_TRUE;
165491d316f6SSatish Balay   return 0;
165591d316f6SSatish Balay 
165691d316f6SSatish Balay }
165791d316f6SSatish Balay 
16585615d1e5SSatish Balay #undef __FUNC__
16595615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
166091d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
166191d316f6SSatish Balay {
166291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16637e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
166417e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
166517e48fc4SSatish Balay 
16660513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
166717e48fc4SSatish Balay   bs   = a->bs;
166817e48fc4SSatish Balay   aa   = a->a;
166917e48fc4SSatish Balay   ai   = a->i;
167017e48fc4SSatish Balay   aj   = a->j;
167117e48fc4SSatish Balay   ambs = a->mbs;
16729df24120SSatish Balay   bs2  = a->bs2;
167391d316f6SSatish Balay 
167491d316f6SSatish Balay   VecSet(&zero,v);
167590f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1676e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
167717e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
167817e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
167917e48fc4SSatish Balay       if (aj[j] == i) {
168017e48fc4SSatish Balay         row  = i*bs;
16817e67e3f9SSatish Balay         aa_j = aa+j*bs2;
16827e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
168391d316f6SSatish Balay         break;
168491d316f6SSatish Balay       }
168591d316f6SSatish Balay     }
168691d316f6SSatish Balay   }
168791d316f6SSatish Balay   return 0;
168891d316f6SSatish Balay }
168991d316f6SSatish Balay 
16905615d1e5SSatish Balay #undef __FUNC__
16915615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
16929867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
16939867e207SSatish Balay {
16949867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16959867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
16967e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
16979867e207SSatish Balay 
16989867e207SSatish Balay   ai  = a->i;
16999867e207SSatish Balay   aj  = a->j;
17009867e207SSatish Balay   aa  = a->a;
17019867e207SSatish Balay   m   = a->m;
17029867e207SSatish Balay   n   = a->n;
17039867e207SSatish Balay   bs  = a->bs;
17049867e207SSatish Balay   mbs = a->mbs;
17059df24120SSatish Balay   bs2 = a->bs2;
17069867e207SSatish Balay   if (ll) {
170790f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1708e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17099867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17109867e207SSatish Balay       M  = ai[i+1] - ai[i];
17119867e207SSatish Balay       li = l + i*bs;
17127e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17139867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17147e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17159867e207SSatish Balay           (*v++) *= li[k%bs];
17169867e207SSatish Balay         }
17179867e207SSatish Balay       }
17189867e207SSatish Balay     }
17199867e207SSatish Balay   }
17209867e207SSatish Balay 
17219867e207SSatish Balay   if (rr) {
172290f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1723e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17249867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17259867e207SSatish Balay       M  = ai[i+1] - ai[i];
17267e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17279867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17289867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17299867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17309867e207SSatish Balay           x = ri[k];
17319867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17329867e207SSatish Balay         }
17339867e207SSatish Balay       }
17349867e207SSatish Balay     }
17359867e207SSatish Balay   }
17369867e207SSatish Balay   return 0;
17379867e207SSatish Balay }
17389867e207SSatish Balay 
17399867e207SSatish Balay 
1740b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1741b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17422a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1743736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1744736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17451a6a6d98SBarry Smith 
17461a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17471a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
17481a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
17491a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
17501a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
17511a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1752*48e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
17531a6a6d98SBarry Smith 
17547fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
17557fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
17567fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
17577fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
17587fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
17597fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
17602593348eSBarry Smith 
17615615d1e5SSatish Balay #undef __FUNC__
17625615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
1763b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
17642593348eSBarry Smith {
1765b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17662593348eSBarry Smith   Scalar      *v = a->a;
17672593348eSBarry Smith   double      sum = 0.0;
17689df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
17692593348eSBarry Smith 
17702593348eSBarry Smith   if (type == NORM_FROBENIUS) {
17710419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
17722593348eSBarry Smith #if defined(PETSC_COMPLEX)
17732593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
17742593348eSBarry Smith #else
17752593348eSBarry Smith       sum += (*v)*(*v); v++;
17762593348eSBarry Smith #endif
17772593348eSBarry Smith     }
17782593348eSBarry Smith     *norm = sqrt(sum);
17792593348eSBarry Smith   }
17802593348eSBarry Smith   else {
1781e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
17822593348eSBarry Smith   }
17832593348eSBarry Smith   return 0;
17842593348eSBarry Smith }
17852593348eSBarry Smith 
17862593348eSBarry Smith /*
17872593348eSBarry Smith      note: This can only work for identity for row and col. It would
17882593348eSBarry Smith    be good to check this and otherwise generate an error.
17892593348eSBarry Smith */
17905615d1e5SSatish Balay #undef __FUNC__
17915615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
1792b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
17932593348eSBarry Smith {
1794b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
17952593348eSBarry Smith   Mat         outA;
1796de6a44a3SBarry Smith   int         ierr;
17972593348eSBarry Smith 
1798e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
17992593348eSBarry Smith 
18002593348eSBarry Smith   outA          = inA;
18012593348eSBarry Smith   inA->factor   = FACTOR_LU;
18022593348eSBarry Smith   a->row        = row;
18032593348eSBarry Smith   a->col        = col;
18042593348eSBarry Smith 
1805de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
18062593348eSBarry Smith 
18072593348eSBarry Smith   if (!a->diag) {
1808b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18092593348eSBarry Smith   }
18107fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18112593348eSBarry Smith   return 0;
18122593348eSBarry Smith }
18132593348eSBarry Smith 
18145615d1e5SSatish Balay #undef __FUNC__
18155615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
1816b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18172593348eSBarry Smith {
1818b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18199df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1820b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1821b6490206SBarry Smith   PLogFlops(totalnz);
18222593348eSBarry Smith   return 0;
18232593348eSBarry Smith }
18242593348eSBarry Smith 
18255615d1e5SSatish Balay #undef __FUNC__
18265615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18277e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18287e67e3f9SSatish Balay {
18297e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18307e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1831a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18329df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18337e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18347e67e3f9SSatish Balay 
18357e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
18367e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1837e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1838e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1839a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
18407e67e3f9SSatish Balay     nrow = ailen[brow];
18417e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1842e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1843e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1844a7c10996SSatish Balay       col  = in[l] ;
18457e67e3f9SSatish Balay       bcol = col/bs;
18467e67e3f9SSatish Balay       cidx = col%bs;
18477e67e3f9SSatish Balay       ridx = row%bs;
18487e67e3f9SSatish Balay       high = nrow;
18497e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
18507e67e3f9SSatish Balay       while (high-low > 5) {
18517e67e3f9SSatish Balay         t = (low+high)/2;
18527e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
18537e67e3f9SSatish Balay         else             low  = t;
18547e67e3f9SSatish Balay       }
18557e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
18567e67e3f9SSatish Balay         if (rp[i] > bcol) break;
18577e67e3f9SSatish Balay         if (rp[i] == bcol) {
18587e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18597e67e3f9SSatish Balay           goto finished;
18607e67e3f9SSatish Balay         }
18617e67e3f9SSatish Balay       }
18627e67e3f9SSatish Balay       *v++ = zero;
18637e67e3f9SSatish Balay       finished:;
18647e67e3f9SSatish Balay     }
18657e67e3f9SSatish Balay   }
18667e67e3f9SSatish Balay   return 0;
18677e67e3f9SSatish Balay }
18687e67e3f9SSatish Balay 
18695615d1e5SSatish Balay #undef __FUNC__
18705615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
18715a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
18725a838052SSatish Balay {
18735a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
18745a838052SSatish Balay   *bs = baij->bs;
18755a838052SSatish Balay   return 0;
18765a838052SSatish Balay }
18775a838052SSatish Balay 
1878d9b7c43dSSatish Balay /* idx should be of length atlease bs */
18795615d1e5SSatish Balay #undef __FUNC__
18805615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1881d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1882d9b7c43dSSatish Balay {
1883d9b7c43dSSatish Balay   int i,row;
1884d9b7c43dSSatish Balay   row = idx[0];
1885d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1886d9b7c43dSSatish Balay 
1887d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1888d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1889d9b7c43dSSatish Balay   }
1890d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1891d9b7c43dSSatish Balay   return 0;
1892d9b7c43dSSatish Balay }
1893d9b7c43dSSatish Balay 
18945615d1e5SSatish Balay #undef __FUNC__
18955615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
1896d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1897d9b7c43dSSatish Balay {
1898d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1899d9b7c43dSSatish Balay   IS          is_local;
1900d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1901d9b7c43dSSatish Balay   PetscTruth  flg;
1902d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1903d9b7c43dSSatish Balay 
1904d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1905d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1906d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1907537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1908d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1909d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1910d9b7c43dSSatish Balay 
1911d9b7c43dSSatish Balay   i = 0;
1912d9b7c43dSSatish Balay   while (i < is_n) {
1913e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1914d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1915d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1916d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1917d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1918d9b7c43dSSatish Balay       i += bs;
1919d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1920d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1921d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1922d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1923d9b7c43dSSatish Balay         aa[0] = zero;
1924d9b7c43dSSatish Balay         aa+=bs;
1925d9b7c43dSSatish Balay       }
1926d9b7c43dSSatish Balay       i++;
1927d9b7c43dSSatish Balay     }
1928d9b7c43dSSatish Balay   }
1929d9b7c43dSSatish Balay   if (diag) {
1930d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1931d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1932d9b7c43dSSatish Balay     }
1933d9b7c43dSSatish Balay   }
1934d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1935d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1936d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
19379a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1938d9b7c43dSSatish Balay 
1939d9b7c43dSSatish Balay   return 0;
1940d9b7c43dSSatish Balay }
19411c351548SSatish Balay 
19425615d1e5SSatish Balay #undef __FUNC__
19435615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1944ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1945ba4ca20aSSatish Balay {
1946ba4ca20aSSatish Balay   static int called = 0;
1947ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1948ba4ca20aSSatish Balay 
1949ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1950ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1951ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1952ba4ca20aSSatish Balay   return 0;
1953ba4ca20aSSatish Balay }
1954d9b7c43dSSatish Balay 
19552593348eSBarry Smith /* -------------------------------------------------------------------*/
1956cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
19579867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1958f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1959faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
19601a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1961b6490206SBarry Smith        0,0,
1962de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1963b6490206SBarry Smith        0,
1964f2501298SSatish Balay        MatTranspose_SeqBAIJ,
196517e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
19669867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1967584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1968b6490206SBarry Smith        0,
1969d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
19707fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1971b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1972de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1973d402145bSBarry Smith        0,0,
1974b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1975b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1976af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
19777e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1978ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
19793b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
19803b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
198192c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
198292c4ed94SBarry Smith        0,0,0,0,0,0,
198392c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
19842593348eSBarry Smith 
19855615d1e5SSatish Balay #undef __FUNC__
19865615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
19872593348eSBarry Smith /*@C
198844cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
198944cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
199044cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
19912bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
19922bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
19932593348eSBarry Smith 
19942593348eSBarry Smith    Input Parameters:
19952593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1996b6490206SBarry Smith .  bs - size of block
19972593348eSBarry Smith .  m - number of rows
19982593348eSBarry Smith .  n - number of columns
1999b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20002bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20012bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20022593348eSBarry Smith 
20032593348eSBarry Smith    Output Parameter:
20042593348eSBarry Smith .  A - the matrix
20052593348eSBarry Smith 
20060513a670SBarry Smith    Options Database Keys:
20070513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20080513a670SBarry Smith $                     block calculations (much solver)
20090513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20100513a670SBarry Smith 
20112593348eSBarry Smith    Notes:
201244cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20132593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
201444cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20152593348eSBarry Smith 
20162593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20172593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20182593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20196da5968aSLois Curfman McInnes    matrices.
20202593348eSBarry Smith 
202144cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20222593348eSBarry Smith @*/
2023b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20242593348eSBarry Smith {
20252593348eSBarry Smith   Mat         B;
2026b6490206SBarry Smith   Mat_SeqBAIJ *b;
20273b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20283b2fbd54SBarry Smith 
20293b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2030e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2031b6490206SBarry Smith 
20320513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20330513a670SBarry Smith 
2034f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2035e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
20362593348eSBarry Smith 
20372593348eSBarry Smith   *A = 0;
2038b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
20392593348eSBarry Smith   PLogObjectCreate(B);
2040b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
204144cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
20422593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
20431a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
20441a6a6d98SBarry Smith   if (!flg) {
20457fc0212eSBarry Smith     switch (bs) {
20467fc0212eSBarry Smith     case 1:
20477fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20481a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
204939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2050f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
20517fc0212eSBarry Smith       break;
20524eeb42bcSBarry Smith     case 2:
20534eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
20541a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
205539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2056f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
20576d84be18SBarry Smith       break;
2058f327dd97SBarry Smith     case 3:
2059f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
20601a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
206139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2062f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
20634eeb42bcSBarry Smith       break;
20646d84be18SBarry Smith     case 4:
20656d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
20661a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
206739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2068f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
20696d84be18SBarry Smith       break;
20706d84be18SBarry Smith     case 5:
20716d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
20721a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
207339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2074f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
20756d84be18SBarry Smith       break;
2076*48e9ddb2SSatish Balay     case 7:
2077*48e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
2078*48e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
2079*48e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
2080*48e9ddb2SSatish Balay       break;
20817fc0212eSBarry Smith     }
20821a6a6d98SBarry Smith   }
2083b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2084b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
20852593348eSBarry Smith   B->factor           = 0;
20862593348eSBarry Smith   B->lupivotthreshold = 1.0;
208790f02eecSBarry Smith   B->mapping          = 0;
20882593348eSBarry Smith   b->row              = 0;
20892593348eSBarry Smith   b->col              = 0;
20902593348eSBarry Smith   b->reallocs         = 0;
20912593348eSBarry Smith 
209244cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
209344cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2094b6490206SBarry Smith   b->mbs     = mbs;
2095f2501298SSatish Balay   b->nbs     = nbs;
2096b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
20972593348eSBarry Smith   if (nnz == PETSC_NULL) {
2098119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
20992593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2100b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2101b6490206SBarry Smith     nz = nz*mbs;
21022593348eSBarry Smith   }
21032593348eSBarry Smith   else {
21042593348eSBarry Smith     nz = 0;
2105b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21062593348eSBarry Smith   }
21072593348eSBarry Smith 
21082593348eSBarry Smith   /* allocate the matrix space */
21097e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21102593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21117e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21127e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21132593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21142593348eSBarry Smith   b->i  = b->j + nz;
21152593348eSBarry Smith   b->singlemalloc = 1;
21162593348eSBarry Smith 
2117de6a44a3SBarry Smith   b->i[0] = 0;
2118b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21192593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21202593348eSBarry Smith   }
21212593348eSBarry Smith 
2122b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2123b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2124b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
2125b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21262593348eSBarry Smith 
2127b6490206SBarry Smith   b->bs               = bs;
21289df24120SSatish Balay   b->bs2              = bs2;
2129b6490206SBarry Smith   b->mbs              = mbs;
21302593348eSBarry Smith   b->nz               = 0;
213118e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21322593348eSBarry Smith   b->sorted           = 0;
21332593348eSBarry Smith   b->roworiented      = 1;
21342593348eSBarry Smith   b->nonew            = 0;
21352593348eSBarry Smith   b->diag             = 0;
21362593348eSBarry Smith   b->solve_work       = 0;
2137de6a44a3SBarry Smith   b->mult_work        = 0;
21382593348eSBarry Smith   b->spptr            = 0;
21394e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
21404e220ebcSLois Curfman McInnes 
21412593348eSBarry Smith   *A = B;
21422593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
21432593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
21442593348eSBarry Smith   return 0;
21452593348eSBarry Smith }
21462593348eSBarry Smith 
21475615d1e5SSatish Balay #undef __FUNC__
21485615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2149b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
21502593348eSBarry Smith {
21512593348eSBarry Smith   Mat         C;
2152b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
21539df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2154de6a44a3SBarry Smith 
2155e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
21562593348eSBarry Smith 
21572593348eSBarry Smith   *B = 0;
2158b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
21592593348eSBarry Smith   PLogObjectCreate(C);
2160b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
21612593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2162b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2163b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
21642593348eSBarry Smith   C->factor     = A->factor;
21652593348eSBarry Smith   c->row        = 0;
21662593348eSBarry Smith   c->col        = 0;
21672593348eSBarry Smith   C->assembled  = PETSC_TRUE;
21682593348eSBarry Smith 
216944cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
217044cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
217144cd7ae7SLois Curfman McInnes   C->M          = a->m;
217244cd7ae7SLois Curfman McInnes   C->N          = a->n;
217344cd7ae7SLois Curfman McInnes 
2174b6490206SBarry Smith   c->bs         = a->bs;
21759df24120SSatish Balay   c->bs2        = a->bs2;
2176b6490206SBarry Smith   c->mbs        = a->mbs;
2177b6490206SBarry Smith   c->nbs        = a->nbs;
21782593348eSBarry Smith 
2179b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2180b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2181b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
21822593348eSBarry Smith     c->imax[i] = a->imax[i];
21832593348eSBarry Smith     c->ilen[i] = a->ilen[i];
21842593348eSBarry Smith   }
21852593348eSBarry Smith 
21862593348eSBarry Smith   /* allocate the matrix space */
21872593348eSBarry Smith   c->singlemalloc = 1;
21887e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
21892593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
21907e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2191de6a44a3SBarry Smith   c->i  = c->j + nz;
2192b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2193b6490206SBarry Smith   if (mbs > 0) {
2194de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
21952593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
21967e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
21972593348eSBarry Smith     }
21982593348eSBarry Smith   }
21992593348eSBarry Smith 
2200b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
22012593348eSBarry Smith   c->sorted      = a->sorted;
22022593348eSBarry Smith   c->roworiented = a->roworiented;
22032593348eSBarry Smith   c->nonew       = a->nonew;
22042593348eSBarry Smith 
22052593348eSBarry Smith   if (a->diag) {
2206b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2207b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2208b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22092593348eSBarry Smith       c->diag[i] = a->diag[i];
22102593348eSBarry Smith     }
22112593348eSBarry Smith   }
22122593348eSBarry Smith   else c->diag          = 0;
22132593348eSBarry Smith   c->nz                 = a->nz;
22142593348eSBarry Smith   c->maxnz              = a->maxnz;
22152593348eSBarry Smith   c->solve_work         = 0;
22162593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22177fc0212eSBarry Smith   c->mult_work          = 0;
22182593348eSBarry Smith   *B = C;
22192593348eSBarry Smith   return 0;
22202593348eSBarry Smith }
22212593348eSBarry Smith 
22225615d1e5SSatish Balay #undef __FUNC__
22235615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
222419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22252593348eSBarry Smith {
2226b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22272593348eSBarry Smith   Mat          B;
2228de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2229b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
223035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2231a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2232b6490206SBarry Smith   Scalar       *aa;
223319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22342593348eSBarry Smith 
22351a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2236de6a44a3SBarry Smith   bs2  = bs*bs;
2237b6490206SBarry Smith 
22382593348eSBarry Smith   MPI_Comm_size(comm,&size);
2239e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
224090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
224177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
2242e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
22432593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
22442593348eSBarry Smith 
2245e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
224635aab85fSBarry Smith 
224735aab85fSBarry Smith   /*
224835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
224935aab85fSBarry Smith     divisible by the blocksize
225035aab85fSBarry Smith   */
2251b6490206SBarry Smith   mbs        = M/bs;
225235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
225335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
225435aab85fSBarry Smith   else                  mbs++;
225535aab85fSBarry Smith   if (extra_rows) {
2256537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
225735aab85fSBarry Smith   }
2258b6490206SBarry Smith 
22592593348eSBarry Smith   /* read in row lengths */
226035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
226177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
226235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
22632593348eSBarry Smith 
2264b6490206SBarry Smith   /* read in column indices */
226535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
226677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
226735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2268b6490206SBarry Smith 
2269b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2270b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2271b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
227235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
227335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
227435aab85fSBarry Smith   masked = mask + mbs;
2275b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2276b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
227735aab85fSBarry Smith     nmask = 0;
2278b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2279b6490206SBarry Smith       kmax = rowlengths[rowcount];
2280b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
228135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
228235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2283b6490206SBarry Smith       }
2284b6490206SBarry Smith       rowcount++;
2285b6490206SBarry Smith     }
228635aab85fSBarry Smith     browlengths[i] += nmask;
228735aab85fSBarry Smith     /* zero out the mask elements we set */
228835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2289b6490206SBarry Smith   }
2290b6490206SBarry Smith 
22912593348eSBarry Smith   /* create our matrix */
229235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
229335aab85fSBarry Smith          CHKERRQ(ierr);
22942593348eSBarry Smith   B = *A;
2295b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
22962593348eSBarry Smith 
22972593348eSBarry Smith   /* set matrix "i" values */
2298de6a44a3SBarry Smith   a->i[0] = 0;
2299b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2300b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2301b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23022593348eSBarry Smith   }
2303b6490206SBarry Smith   a->nz         = 0;
2304b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23052593348eSBarry Smith 
2306b6490206SBarry Smith   /* read in nonzero values */
230735aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
230877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
230935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2310b6490206SBarry Smith 
2311b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2312b6490206SBarry Smith   nzcount = 0; jcount = 0;
2313b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2314b6490206SBarry Smith     nzcountb = nzcount;
231535aab85fSBarry Smith     nmask    = 0;
2316b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2317b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2318b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
231935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
232035aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2321b6490206SBarry Smith       }
2322b6490206SBarry Smith       rowcount++;
2323b6490206SBarry Smith     }
2324de6a44a3SBarry Smith     /* sort the masked values */
232577c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2326de6a44a3SBarry Smith 
2327b6490206SBarry Smith     /* set "j" values into matrix */
2328b6490206SBarry Smith     maskcount = 1;
232935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
233035aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2331de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2332b6490206SBarry Smith     }
2333b6490206SBarry Smith     /* set "a" values into matrix */
2334de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2335b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2336b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2337b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2338de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2339de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2340de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2341de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2342b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2343b6490206SBarry Smith       }
2344b6490206SBarry Smith     }
234535aab85fSBarry Smith     /* zero out the mask elements we set */
234635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2347b6490206SBarry Smith   }
2348e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2349b6490206SBarry Smith 
2350b6490206SBarry Smith   PetscFree(rowlengths);
2351b6490206SBarry Smith   PetscFree(browlengths);
2352b6490206SBarry Smith   PetscFree(aa);
2353b6490206SBarry Smith   PetscFree(jj);
2354b6490206SBarry Smith   PetscFree(mask);
2355b6490206SBarry Smith 
2356b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2357de6a44a3SBarry Smith 
2358de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2359de6a44a3SBarry Smith   if (flg) {
236019bcc07fSBarry Smith     Viewer tviewer;
236119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2362639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
236319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
236419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2365de6a44a3SBarry Smith   }
2366de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2367de6a44a3SBarry Smith   if (flg) {
236819bcc07fSBarry Smith     Viewer tviewer;
236919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2370639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
237119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
237219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2373de6a44a3SBarry Smith   }
2374de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2375de6a44a3SBarry Smith   if (flg) {
237619bcc07fSBarry Smith     Viewer tviewer;
237719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
237819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
237919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2380de6a44a3SBarry Smith   }
2381de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2382de6a44a3SBarry Smith   if (flg) {
238319bcc07fSBarry Smith     Viewer tviewer;
238419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2385639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
238619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
238719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2388de6a44a3SBarry Smith   }
2389de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2390de6a44a3SBarry Smith   if (flg) {
239119bcc07fSBarry Smith     Viewer tviewer;
239219bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
239319bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
239419bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
239519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2396de6a44a3SBarry Smith   }
23972593348eSBarry Smith   return 0;
23982593348eSBarry Smith }
23992593348eSBarry Smith 
24002593348eSBarry Smith 
24012593348eSBarry Smith 
2402