xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 9c01be13328523e4a6056820612a64024a96658c)
1*9c01be13SBarry Smith 
2a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
3*9c01be13SBarry Smith static char vcid[] = "$Id: baij.c,v 1.112 1997/09/26 02:19:29 bsmith Exp bsmith $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
103369ce9aSBarry Smith 
113369ce9aSBarry Smith #include "pinclude/pviewer.h"
123369ce9aSBarry Smith #include "sys.h"
1370f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
141a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
151a6a6d98SBarry Smith #include "src/inline/spops.h"
162593348eSBarry Smith #include "petsc.h"
173b547af2SSatish Balay 
182593348eSBarry Smith 
19de6a44a3SBarry Smith /*
20de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
21de6a44a3SBarry Smith */
22de6a44a3SBarry Smith 
235615d1e5SSatish Balay #undef __FUNC__
245615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
25de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
26de6a44a3SBarry Smith {
27de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
287fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
29de6a44a3SBarry Smith 
30de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
31de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
327fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
33de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
34de6a44a3SBarry Smith       if (a->j[j] == i) {
35de6a44a3SBarry Smith         diag[i] = j;
36de6a44a3SBarry Smith         break;
37de6a44a3SBarry Smith       }
38de6a44a3SBarry Smith     }
39de6a44a3SBarry Smith   }
40de6a44a3SBarry Smith   a->diag = diag;
41de6a44a3SBarry Smith   return 0;
42de6a44a3SBarry Smith }
432593348eSBarry Smith 
442593348eSBarry Smith 
453b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
463b2fbd54SBarry Smith 
475615d1e5SSatish Balay #undef __FUNC__
485615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
493b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
503b2fbd54SBarry Smith                             PetscTruth *done)
513b2fbd54SBarry Smith {
523b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
533b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
543b2fbd54SBarry Smith 
553b2fbd54SBarry Smith   *nn = n;
563b2fbd54SBarry Smith   if (!ia) return 0;
573b2fbd54SBarry Smith   if (symmetric) {
583b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
593b2fbd54SBarry Smith   } else if (oshift == 1) {
603b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
613b2fbd54SBarry Smith     int nz = a->i[n] + 1;
623b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
633b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
643b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
653b2fbd54SBarry Smith   } else {
663b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
673b2fbd54SBarry Smith   }
683b2fbd54SBarry Smith 
693b2fbd54SBarry Smith   return 0;
703b2fbd54SBarry Smith }
713b2fbd54SBarry Smith 
725615d1e5SSatish Balay #undef __FUNC__
73d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
743b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
753b2fbd54SBarry Smith                                 PetscTruth *done)
763b2fbd54SBarry Smith {
773b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
783b2fbd54SBarry Smith   int         i,n = a->mbs;
793b2fbd54SBarry Smith 
803b2fbd54SBarry Smith   if (!ia) return 0;
813b2fbd54SBarry Smith   if (symmetric) {
823b2fbd54SBarry Smith     PetscFree(*ia);
833b2fbd54SBarry Smith     PetscFree(*ja);
84af5da2bfSBarry Smith   } else if (oshift == 1) {
853b2fbd54SBarry Smith     int nz = a->i[n];
863b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
873b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
883b2fbd54SBarry Smith   }
893b2fbd54SBarry Smith   return 0;
903b2fbd54SBarry Smith }
913b2fbd54SBarry Smith 
923b2fbd54SBarry Smith 
935615d1e5SSatish Balay #undef __FUNC__
94d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary"
95b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
962593348eSBarry Smith {
97b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
989df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
99b6490206SBarry Smith   Scalar      *aa;
1002593348eSBarry Smith 
10190ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1022593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1032593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1043b2fbd54SBarry Smith 
1052593348eSBarry Smith   col_lens[1] = a->m;
1062593348eSBarry Smith   col_lens[2] = a->n;
1077e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1082593348eSBarry Smith 
1092593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
110b6490206SBarry Smith   count = 0;
111b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
112b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
113b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
114b6490206SBarry Smith     }
1152593348eSBarry Smith   }
1160752156aSBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
1172593348eSBarry Smith   PetscFree(col_lens);
1182593348eSBarry Smith 
1192593348eSBarry Smith   /* store column indices (zero start index) */
12066963ce1SSatish Balay   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
121b6490206SBarry Smith   count = 0;
122b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
123b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
124b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
125b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
126b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1272593348eSBarry Smith         }
1282593348eSBarry Smith       }
129b6490206SBarry Smith     }
130b6490206SBarry Smith   }
1310752156aSBarry Smith   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
132b6490206SBarry Smith   PetscFree(jj);
1332593348eSBarry Smith 
1342593348eSBarry Smith   /* store nonzero values */
13566963ce1SSatish Balay   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
136b6490206SBarry Smith   count = 0;
137b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
138b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
139b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
140b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1417e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
142b6490206SBarry Smith         }
143b6490206SBarry Smith       }
144b6490206SBarry Smith     }
145b6490206SBarry Smith   }
1460752156aSBarry Smith   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
147b6490206SBarry Smith   PetscFree(aa);
1482593348eSBarry Smith   return 0;
1492593348eSBarry Smith }
1502593348eSBarry Smith 
1515615d1e5SSatish Balay #undef __FUNC__
152d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII"
153b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1542593348eSBarry Smith {
155b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1569df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1572593348eSBarry Smith   FILE        *fd;
1582593348eSBarry Smith   char        *outputname;
1592593348eSBarry Smith 
16090ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1612593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16290ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
163639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1647ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1652593348eSBarry Smith   }
166639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
167e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1682593348eSBarry Smith   }
169639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
17044cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17144cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17244cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17344cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17444cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17544cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
176766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17744cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17844cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
179766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
180766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
181766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
18244cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18344cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18444cd7ae7SLois Curfman McInnes #else
18544cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18744cd7ae7SLois Curfman McInnes #endif
18844cd7ae7SLois Curfman McInnes           }
18944cd7ae7SLois Curfman McInnes         }
19044cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19144cd7ae7SLois Curfman McInnes       }
19244cd7ae7SLois Curfman McInnes     }
19344cd7ae7SLois Curfman McInnes   }
1942593348eSBarry Smith   else {
195b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
196b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
197b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
198b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
199b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
20088685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
201766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
20288685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2037e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20488685aaeSLois Curfman McInnes           }
205766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
206766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
207766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
208766eeae4SLois Curfman McInnes           }
20988685aaeSLois Curfman McInnes           else {
2107e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
21188685aaeSLois Curfman McInnes           }
21288685aaeSLois Curfman McInnes #else
2137e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
21488685aaeSLois Curfman McInnes #endif
2152593348eSBarry Smith           }
2162593348eSBarry Smith         }
2172593348eSBarry Smith         fprintf(fd,"\n");
2182593348eSBarry Smith       }
2192593348eSBarry Smith     }
220b6490206SBarry Smith   }
2212593348eSBarry Smith   fflush(fd);
2222593348eSBarry Smith   return 0;
2232593348eSBarry Smith }
2242593348eSBarry Smith 
2255615d1e5SSatish Balay #undef __FUNC__
226d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw"
2273270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2283270192aSSatish Balay {
2293270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2303270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2313270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2323270192aSSatish Balay   Scalar       *aa;
2333270192aSSatish Balay   Draw         draw;
2343270192aSSatish Balay   DrawButton   button;
2353270192aSSatish Balay   PetscTruth   isnull;
2363270192aSSatish Balay 
2373270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2383270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2393270192aSSatish Balay 
2403270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2413270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2423270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2433270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2443270192aSSatish Balay   color = DRAW_BLUE;
2453270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2463270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2473270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2483270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2493270192aSSatish Balay       aa = a->a + j*bs2;
2503270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2513270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2523270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
253b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2543270192aSSatish Balay         }
2553270192aSSatish Balay       }
2563270192aSSatish Balay     }
2573270192aSSatish Balay   }
2583270192aSSatish Balay   color = DRAW_CYAN;
2593270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2603270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2613270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2623270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2633270192aSSatish Balay       aa = a->a + j*bs2;
2643270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2653270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2663270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
267b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2683270192aSSatish Balay         }
2693270192aSSatish Balay       }
2703270192aSSatish Balay     }
2713270192aSSatish Balay   }
2723270192aSSatish Balay 
2733270192aSSatish Balay   color = DRAW_RED;
2743270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2753270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2763270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2773270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2783270192aSSatish Balay       aa = a->a + j*bs2;
2793270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2803270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2813270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
282b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2833270192aSSatish Balay         }
2843270192aSSatish Balay       }
2853270192aSSatish Balay     }
2863270192aSSatish Balay   }
2873270192aSSatish Balay 
2883270192aSSatish Balay   DrawFlush(draw);
2893270192aSSatish Balay   DrawGetPause(draw,&pause);
2903270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2913270192aSSatish Balay 
2923270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2933270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2943270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2953270192aSSatish Balay     DrawClear(draw);
2963270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2973270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2983270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2993270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
3003270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
3013270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
3023270192aSSatish Balay     w *= scale; h *= scale;
3033270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
3043270192aSSatish Balay     color = DRAW_BLUE;
3053270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3063270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3073270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3083270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3093270192aSSatish Balay         aa = a->a + j*bs2;
3103270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3113270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3123270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
313b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3143270192aSSatish Balay           }
3153270192aSSatish Balay         }
3163270192aSSatish Balay       }
3173270192aSSatish Balay     }
3183270192aSSatish Balay     color = DRAW_CYAN;
3193270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3203270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3213270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3223270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3233270192aSSatish Balay         aa = a->a + j*bs2;
3243270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3253270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3263270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
327b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3283270192aSSatish Balay           }
3293270192aSSatish Balay         }
3303270192aSSatish Balay       }
3313270192aSSatish Balay     }
3323270192aSSatish Balay 
3333270192aSSatish Balay     color = DRAW_RED;
3343270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3353270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3363270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3373270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3383270192aSSatish Balay         aa = a->a + j*bs2;
3393270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3403270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3413270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
342b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3433270192aSSatish Balay           }
3443270192aSSatish Balay         }
3453270192aSSatish Balay       }
3463270192aSSatish Balay     }
3473270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3483270192aSSatish Balay   }
3493270192aSSatish Balay   return 0;
3503270192aSSatish Balay }
3513270192aSSatish Balay 
3525615d1e5SSatish Balay #undef __FUNC__
353d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ"
3548f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3552593348eSBarry Smith {
3562593348eSBarry Smith   Mat         A = (Mat) obj;
35719bcc07fSBarry Smith   ViewerType  vtype;
35819bcc07fSBarry Smith   int         ierr;
3592593348eSBarry Smith 
36019bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36119bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
362e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3632593348eSBarry Smith   }
36419bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
365b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3662593348eSBarry Smith   }
36719bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
368b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3692593348eSBarry Smith   }
37019bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3713270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3722593348eSBarry Smith   }
3732593348eSBarry Smith   return 0;
3742593348eSBarry Smith }
375b6490206SBarry Smith 
376119a934aSSatish Balay #define CHUNKSIZE  10
377cd0e1443SSatish Balay 
3785615d1e5SSatish Balay #undef __FUNC__
3795615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
380639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
381cd0e1443SSatish Balay {
382cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
383cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
384cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
385a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3869df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
387cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
388cd0e1443SSatish Balay 
389cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
390cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3913b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
392e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
393e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3943b2fbd54SBarry Smith #endif
3952c3acbe9SBarry Smith     rp   = aj + ai[brow];
3962c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3972c3acbe9SBarry Smith     rmax = imax[brow];
3982c3acbe9SBarry Smith     nrow = ailen[brow];
399cd0e1443SSatish Balay     low  = 0;
400cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
4013b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
402e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
403e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
4043b2fbd54SBarry Smith #endif
405a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
406cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
407cd0e1443SSatish Balay       if (roworiented) {
408cd0e1443SSatish Balay         value = *v++;
409cd0e1443SSatish Balay       }
410cd0e1443SSatish Balay       else {
411cd0e1443SSatish Balay         value = v[k + l*m];
412cd0e1443SSatish Balay       }
413cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4142c3acbe9SBarry Smith       while (high-low > 7) {
415cd0e1443SSatish Balay         t = (low+high)/2;
416cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
417cd0e1443SSatish Balay         else              low  = t;
418cd0e1443SSatish Balay       }
419cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
420cd0e1443SSatish Balay         if (rp[i] > bcol) break;
421cd0e1443SSatish Balay         if (rp[i] == bcol) {
4227e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
423cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
424cd0e1443SSatish Balay           else                  *bap  = value;
425f1241b54SBarry Smith           goto noinsert1;
426cd0e1443SSatish Balay         }
427cd0e1443SSatish Balay       }
428c2653b3dSLois Curfman McInnes       if (nonew == 1) goto noinsert1;
42911ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
430cd0e1443SSatish Balay       if (nrow >= rmax) {
431cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
432cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
433cd0e1443SSatish Balay         Scalar *new_a;
434cd0e1443SSatish Balay 
43511ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
43696854ed6SLois Curfman McInnes 
43796854ed6SLois Curfman McInnes         /* Malloc new storage space */
4387e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
439cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4407e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
441cd0e1443SSatish Balay         new_i   = new_j + new_nz;
442cd0e1443SSatish Balay 
443cd0e1443SSatish Balay         /* copy over old data into new slots */
444cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4457e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
446a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
447a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
448a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
449cd0e1443SSatish Balay                                                            len*sizeof(int));
450a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
451a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
452a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
453a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
454cd0e1443SSatish Balay         /* free up old matrix storage */
455cd0e1443SSatish Balay         PetscFree(a->a);
456cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
457cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
458cd0e1443SSatish Balay         a->singlemalloc = 1;
459cd0e1443SSatish Balay 
460a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
461cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4627e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
46318e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
464cd0e1443SSatish Balay         a->reallocs++;
465119a934aSSatish Balay         a->nz++;
466cd0e1443SSatish Balay       }
4677e67e3f9SSatish Balay       N = nrow++ - 1;
468cd0e1443SSatish Balay       /* shift up all the later entries in this row */
469cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
470cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4717e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
472cd0e1443SSatish Balay       }
4737e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
474cd0e1443SSatish Balay       rp[i]                      = bcol;
4757e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
476f1241b54SBarry Smith       noinsert1:;
477cd0e1443SSatish Balay       low = i;
478cd0e1443SSatish Balay     }
479cd0e1443SSatish Balay     ailen[brow] = nrow;
480cd0e1443SSatish Balay   }
481cd0e1443SSatish Balay   return 0;
482cd0e1443SSatish Balay }
483cd0e1443SSatish Balay 
4845615d1e5SSatish Balay #undef __FUNC__
48505a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
48692c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
48792c4ed94SBarry Smith {
48892c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4898a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
490dd9472c6SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
491dd9472c6SBarry Smith   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
4920e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
49392c4ed94SBarry Smith 
4940e324ae4SSatish Balay   if (roworiented) {
4950e324ae4SSatish Balay     stepval = (n-1)*bs;
4960e324ae4SSatish Balay   } else {
4970e324ae4SSatish Balay     stepval = (m-1)*bs;
4980e324ae4SSatish Balay   }
49992c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
50092c4ed94SBarry Smith     row  = im[k];
50192c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
50292c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
5038a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
50492c4ed94SBarry Smith #endif
50592c4ed94SBarry Smith     rp   = aj + ai[row];
50692c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
50792c4ed94SBarry Smith     rmax = imax[row];
50892c4ed94SBarry Smith     nrow = ailen[row];
50992c4ed94SBarry Smith     low  = 0;
51092c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
51192c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
51292c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
5138a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
51492c4ed94SBarry Smith #endif
51592c4ed94SBarry Smith       col = in[l];
51692c4ed94SBarry Smith       if (roworiented) {
5170e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5180e324ae4SSatish Balay       } else {
5190e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
52092c4ed94SBarry Smith       }
52192c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
52292c4ed94SBarry Smith       while (high-low > 7) {
52392c4ed94SBarry Smith         t = (low+high)/2;
52492c4ed94SBarry Smith         if (rp[t] > col) high = t;
52592c4ed94SBarry Smith         else             low  = t;
52692c4ed94SBarry Smith       }
52792c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
52892c4ed94SBarry Smith         if (rp[i] > col) break;
52992c4ed94SBarry Smith         if (rp[i] == col) {
5308a84c255SSatish Balay           bap  = ap +  bs2*i;
5310e324ae4SSatish Balay           if (roworiented) {
5328a84c255SSatish Balay             if (is == ADD_VALUES) {
533dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
534dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5358a84c255SSatish Balay                   bap[jj] += *value++;
536dd9472c6SBarry Smith                 }
537dd9472c6SBarry Smith               }
5380e324ae4SSatish Balay             } else {
539dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
540dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5410e324ae4SSatish Balay                   bap[jj] = *value++;
5428a84c255SSatish Balay                 }
543dd9472c6SBarry Smith               }
544dd9472c6SBarry Smith             }
5450e324ae4SSatish Balay           } else {
5460e324ae4SSatish Balay             if (is == ADD_VALUES) {
547dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
548dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5490e324ae4SSatish Balay                   *bap++ += *value++;
550dd9472c6SBarry Smith                 }
551dd9472c6SBarry Smith               }
5520e324ae4SSatish Balay             } else {
553dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
554dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5550e324ae4SSatish Balay                   *bap++  = *value++;
5560e324ae4SSatish Balay                 }
5578a84c255SSatish Balay               }
558dd9472c6SBarry Smith             }
559dd9472c6SBarry Smith           }
560f1241b54SBarry Smith           goto noinsert2;
56192c4ed94SBarry Smith         }
56292c4ed94SBarry Smith       }
56389280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
56411ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
56592c4ed94SBarry Smith       if (nrow >= rmax) {
56692c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
56792c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
56892c4ed94SBarry Smith         Scalar *new_a;
56992c4ed94SBarry Smith 
57011ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
57189280ab3SLois Curfman McInnes 
57292c4ed94SBarry Smith         /* malloc new storage space */
57392c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
57492c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
57592c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
57692c4ed94SBarry Smith         new_i   = new_j + new_nz;
57792c4ed94SBarry Smith 
57892c4ed94SBarry Smith         /* copy over old data into new slots */
57992c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
58092c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
58192c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
58292c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
583dd9472c6SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
58492c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
58592c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
586dd9472c6SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
58792c4ed94SBarry Smith         /* free up old matrix storage */
58892c4ed94SBarry Smith         PetscFree(a->a);
58992c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
59092c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
59192c4ed94SBarry Smith         a->singlemalloc = 1;
59292c4ed94SBarry Smith 
59392c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
59492c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
59592c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
59692c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
59792c4ed94SBarry Smith         a->reallocs++;
59892c4ed94SBarry Smith         a->nz++;
59992c4ed94SBarry Smith       }
60092c4ed94SBarry Smith       N = nrow++ - 1;
60192c4ed94SBarry Smith       /* shift up all the later entries in this row */
60292c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
60392c4ed94SBarry Smith         rp[ii+1] = rp[ii];
60492c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
60592c4ed94SBarry Smith       }
60692c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
60792c4ed94SBarry Smith       rp[i] = col;
6088a84c255SSatish Balay       bap   = ap +  bs2*i;
6090e324ae4SSatish Balay       if (roworiented) {
610dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval) {
611dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs ) {
6120e324ae4SSatish Balay             bap[jj] = *value++;
613dd9472c6SBarry Smith           }
614dd9472c6SBarry Smith         }
6150e324ae4SSatish Balay       } else {
616dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval ) {
617dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++ ) {
6180e324ae4SSatish Balay             *bap++  = *value++;
6190e324ae4SSatish Balay           }
620dd9472c6SBarry Smith         }
621dd9472c6SBarry Smith       }
622f1241b54SBarry Smith       noinsert2:;
62392c4ed94SBarry Smith       low = i;
62492c4ed94SBarry Smith     }
62592c4ed94SBarry Smith     ailen[row] = nrow;
62692c4ed94SBarry Smith   }
62792c4ed94SBarry Smith   return 0;
62892c4ed94SBarry Smith }
62992c4ed94SBarry Smith 
63092c4ed94SBarry Smith #undef __FUNC__
631d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ"
6328f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6330b824a48SSatish Balay {
6340b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6350752156aSBarry Smith   if (m) *m = a->m;
6360752156aSBarry Smith   if (n) *n = a->n;
6370b824a48SSatish Balay   return 0;
6380b824a48SSatish Balay }
6390b824a48SSatish Balay 
6405615d1e5SSatish Balay #undef __FUNC__
641d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
6428f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6430b824a48SSatish Balay {
6440b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6450b824a48SSatish Balay   *m = 0; *n = a->m;
6460b824a48SSatish Balay   return 0;
6470b824a48SSatish Balay }
6480b824a48SSatish Balay 
6495615d1e5SSatish Balay #undef __FUNC__
6505615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6519867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6529867e207SSatish Balay {
6539867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6547e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6559867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6569867e207SSatish Balay 
6579867e207SSatish Balay   bs  = a->bs;
6589867e207SSatish Balay   ai  = a->i;
6599867e207SSatish Balay   aj  = a->j;
6609867e207SSatish Balay   aa  = a->a;
6619df24120SSatish Balay   bs2 = a->bs2;
6629867e207SSatish Balay 
663e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6649867e207SSatish Balay 
6659867e207SSatish Balay   bn  = row/bs;   /* Block number */
6669867e207SSatish Balay   bp  = row % bs; /* Block Position */
6679867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6689867e207SSatish Balay   *nz = bs*M;
6699867e207SSatish Balay 
6709867e207SSatish Balay   if (v) {
6719867e207SSatish Balay     *v = 0;
6729867e207SSatish Balay     if (*nz) {
6739867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6749867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6759867e207SSatish Balay         v_i  = *v + i*bs;
6767e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6777e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6789867e207SSatish Balay       }
6799867e207SSatish Balay     }
6809867e207SSatish Balay   }
6819867e207SSatish Balay 
6829867e207SSatish Balay   if (idx) {
6839867e207SSatish Balay     *idx = 0;
6849867e207SSatish Balay     if (*nz) {
6859867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6869867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6879867e207SSatish Balay         idx_i = *idx + i*bs;
6889867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6899867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6909867e207SSatish Balay       }
6919867e207SSatish Balay     }
6929867e207SSatish Balay   }
6939867e207SSatish Balay   return 0;
6949867e207SSatish Balay }
6959867e207SSatish Balay 
6965615d1e5SSatish Balay #undef __FUNC__
697d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6989867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6999867e207SSatish Balay {
7009867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
7019867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
7029867e207SSatish Balay   return 0;
7039867e207SSatish Balay }
704b6490206SBarry Smith 
7055615d1e5SSatish Balay #undef __FUNC__
7065615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
7078f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B)
708f2501298SSatish Balay {
709f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
710f2501298SSatish Balay   Mat         C;
711f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
7129df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
713f2501298SSatish Balay   Scalar      *array=a->a;
714f2501298SSatish Balay 
715f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
716e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
717f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
718f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
719a7c10996SSatish Balay 
720a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
721f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
722f2501298SSatish Balay   PetscFree(col);
723f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
724f2501298SSatish Balay   cols = rows + bs;
725f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
726f2501298SSatish Balay     cols[0] = i*bs;
727f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
728f2501298SSatish Balay     len = ai[i+1] - ai[i];
729f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
730f2501298SSatish Balay       rows[0] = (*aj++)*bs;
731f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
732f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
733f2501298SSatish Balay       array += bs2;
734f2501298SSatish Balay     }
735f2501298SSatish Balay   }
7361073c447SSatish Balay   PetscFree(rows);
737f2501298SSatish Balay 
7386d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7396d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
740f2501298SSatish Balay 
741f2501298SSatish Balay   if (B != PETSC_NULL) {
742f2501298SSatish Balay     *B = C;
743f2501298SSatish Balay   } else {
744f2501298SSatish Balay     /* This isn't really an in-place transpose */
745f2501298SSatish Balay     PetscFree(a->a);
746f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
747f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
748f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
749f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
750f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
751f2501298SSatish Balay     PetscFree(a);
752f09e8eb9SSatish Balay     PetscMemcpy(A,C,sizeof(struct _p_Mat));
753f2501298SSatish Balay     PetscHeaderDestroy(C);
754f2501298SSatish Balay   }
755f2501298SSatish Balay   return 0;
756f2501298SSatish Balay }
757f2501298SSatish Balay 
758f2501298SSatish Balay 
7595615d1e5SSatish Balay #undef __FUNC__
7605615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
7618f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
762584200bdSSatish Balay {
763584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
764584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
765a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
76643ee02c3SBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
767584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
768584200bdSSatish Balay 
7696d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
770584200bdSSatish Balay 
77143ee02c3SBarry Smith   if (m) rmax = ailen[0];
772584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
773584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
774584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
775d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
776584200bdSSatish Balay     if (fshift) {
777a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
778584200bdSSatish Balay       N = ailen[i];
779584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
780584200bdSSatish Balay         ip[j-fshift] = ip[j];
7817e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
782584200bdSSatish Balay       }
783584200bdSSatish Balay     }
784584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
785584200bdSSatish Balay   }
786584200bdSSatish Balay   if (mbs) {
787584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
788584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
789584200bdSSatish Balay   }
790584200bdSSatish Balay   /* reset ilen and imax for each row */
791584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
792584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
793584200bdSSatish Balay   }
794a7c10996SSatish Balay   a->nz = ai[mbs];
795584200bdSSatish Balay 
796584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
797584200bdSSatish Balay   if (fshift && a->diag) {
798584200bdSSatish Balay     PetscFree(a->diag);
799584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
800584200bdSSatish Balay     a->diag = 0;
801584200bdSSatish Balay   }
8023d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
803219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
8043d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
805584200bdSSatish Balay            a->reallocs);
806d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
807e2f3b5e9SSatish Balay   a->reallocs          = 0;
8084e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
8094e220ebcSLois Curfman McInnes 
810584200bdSSatish Balay   return 0;
811584200bdSSatish Balay }
812584200bdSSatish Balay 
8135615d1e5SSatish Balay #undef __FUNC__
8145615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
8158f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A)
8162593348eSBarry Smith {
817b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8189df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
8192593348eSBarry Smith   return 0;
8202593348eSBarry Smith }
8212593348eSBarry Smith 
8225615d1e5SSatish Balay #undef __FUNC__
823d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ"
824b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8252593348eSBarry Smith {
8262593348eSBarry Smith   Mat         A  = (Mat) obj;
827b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8282593348eSBarry Smith 
8292593348eSBarry Smith #if defined(PETSC_LOG)
8302593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8312593348eSBarry Smith #endif
8322593348eSBarry Smith   PetscFree(a->a);
8332593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8342593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8352593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8362593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8372593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
838de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8392593348eSBarry Smith   PetscFree(a);
840f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
841f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8422593348eSBarry Smith   return 0;
8432593348eSBarry Smith }
8442593348eSBarry Smith 
8455615d1e5SSatish Balay #undef __FUNC__
846d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ"
8478f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8482593348eSBarry Smith {
849b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8506d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
8516d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
8526d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
853219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
8546d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
855c2653b3dSLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
85696854ed6SLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
8576d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
8586d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
859219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8606d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8616d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
86290f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
8632b362799SSatish Balay            op == MAT_IGNORE_OFF_PROC_ENTRIES)
86494a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8656d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
866e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8672593348eSBarry Smith   else
868e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8692593348eSBarry Smith   return 0;
8702593348eSBarry Smith }
8712593348eSBarry Smith 
8722593348eSBarry Smith 
8732593348eSBarry Smith /* -------------------------------------------------------*/
8742593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8752593348eSBarry Smith /* -------------------------------------------------------*/
876b6490206SBarry Smith #include "pinclude/plapack.h"
877b6490206SBarry Smith 
8785615d1e5SSatish Balay #undef __FUNC__
8795615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
88039b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8812593348eSBarry Smith {
882b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
88339b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
884c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8852593348eSBarry Smith 
886c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
887c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
888b6490206SBarry Smith 
889119a934aSSatish Balay   idx   = a->j;
890119a934aSSatish Balay   v     = a->a;
891119a934aSSatish Balay   ii    = a->i;
892119a934aSSatish Balay 
893119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
894119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
895119a934aSSatish Balay     sum  = 0.0;
896119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8971a6a6d98SBarry Smith     z[i] = sum;
898119a934aSSatish Balay   }
899c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
900c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
90139b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
90239b95ed1SBarry Smith   return 0;
90339b95ed1SBarry Smith }
90439b95ed1SBarry Smith 
9055615d1e5SSatish Balay #undef __FUNC__
9065615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
90739b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
90839b95ed1SBarry Smith {
90939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
91039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
91139b95ed1SBarry Smith   register Scalar x1,x2;
91239b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
913c16cb8f2SBarry Smith   int             j,n;
91439b95ed1SBarry Smith 
915c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
916c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
91739b95ed1SBarry Smith 
91839b95ed1SBarry Smith   idx   = a->j;
91939b95ed1SBarry Smith   v     = a->a;
92039b95ed1SBarry Smith   ii    = a->i;
92139b95ed1SBarry Smith 
922119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
923119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
924119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
925119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
926119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
927119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
928119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
929119a934aSSatish Balay       v += 4;
930119a934aSSatish Balay     }
9311a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
932119a934aSSatish Balay     z += 2;
933119a934aSSatish Balay   }
934c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
935c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
93639b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
93739b95ed1SBarry Smith   return 0;
93839b95ed1SBarry Smith }
93939b95ed1SBarry Smith 
9405615d1e5SSatish Balay #undef __FUNC__
9415615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
94239b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
94339b95ed1SBarry Smith {
94439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
94539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
946c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
94739b95ed1SBarry Smith 
948c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
949c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
95039b95ed1SBarry Smith 
95139b95ed1SBarry Smith   idx   = a->j;
95239b95ed1SBarry Smith   v     = a->a;
95339b95ed1SBarry Smith   ii    = a->i;
95439b95ed1SBarry Smith 
955119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
956119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
957119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
958119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
959119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
960119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
961119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
962119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
963119a934aSSatish Balay       v += 9;
964119a934aSSatish Balay     }
9651a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
966119a934aSSatish Balay     z += 3;
967119a934aSSatish Balay   }
968c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
969c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
97039b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
97139b95ed1SBarry Smith   return 0;
97239b95ed1SBarry Smith }
97339b95ed1SBarry Smith 
9745615d1e5SSatish Balay #undef __FUNC__
9755615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
97639b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
97739b95ed1SBarry Smith {
97839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
97939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
98039b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
98139b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
982c16cb8f2SBarry Smith   int             j,n;
98339b95ed1SBarry Smith 
984c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
985c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
98639b95ed1SBarry Smith 
98739b95ed1SBarry Smith   idx   = a->j;
98839b95ed1SBarry Smith   v     = a->a;
98939b95ed1SBarry Smith   ii    = a->i;
99039b95ed1SBarry Smith 
991119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
992119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
993119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
994119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
995119a934aSSatish Balay       xb = x + 4*(*idx++);
996119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
997119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
998119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
999119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1000119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1001119a934aSSatish Balay       v += 16;
1002119a934aSSatish Balay     }
10031a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1004119a934aSSatish Balay     z += 4;
1005119a934aSSatish Balay   }
1006c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1007c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
100839b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
100939b95ed1SBarry Smith   return 0;
101039b95ed1SBarry Smith }
101139b95ed1SBarry Smith 
10125615d1e5SSatish Balay #undef __FUNC__
10135615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
101439b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
101539b95ed1SBarry Smith {
101639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
101739b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
101839b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
1019c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
102039b95ed1SBarry Smith 
1021c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1022c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
102339b95ed1SBarry Smith 
102439b95ed1SBarry Smith   idx   = a->j;
102539b95ed1SBarry Smith   v     = a->a;
102639b95ed1SBarry Smith   ii    = a->i;
102739b95ed1SBarry Smith 
1028119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1029119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1030119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1031119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1032119a934aSSatish Balay       xb = x + 5*(*idx++);
1033119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1034119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1035119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1036119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1037119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1038119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1039119a934aSSatish Balay       v += 25;
1040119a934aSSatish Balay     }
10411a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1042119a934aSSatish Balay     z += 5;
1043119a934aSSatish Balay   }
1044c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1045c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
104639b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
104739b95ed1SBarry Smith   return 0;
104839b95ed1SBarry Smith }
104939b95ed1SBarry Smith 
10505615d1e5SSatish Balay #undef __FUNC__
105148e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
105248e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
105348e9ddb2SSatish Balay {
105448e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
105548e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
105648e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
105748e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
105848e9ddb2SSatish Balay 
105948e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
106048e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
106148e9ddb2SSatish Balay 
106248e9ddb2SSatish Balay   idx   = a->j;
106348e9ddb2SSatish Balay   v     = a->a;
106448e9ddb2SSatish Balay   ii    = a->i;
106548e9ddb2SSatish Balay 
106648e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
106748e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
106848e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
106948e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
107048e9ddb2SSatish Balay       xb = x + 7*(*idx++);
107148e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
107248e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
107348e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
107448e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
107548e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
107648e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
107748e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
107848e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
107948e9ddb2SSatish Balay       v += 49;
108048e9ddb2SSatish Balay     }
108148e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
108248e9ddb2SSatish Balay     z += 7;
108348e9ddb2SSatish Balay   }
108448e9ddb2SSatish Balay 
108548e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
108648e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
108748e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
108848e9ddb2SSatish Balay   return 0;
108948e9ddb2SSatish Balay }
109048e9ddb2SSatish Balay 
109148e9ddb2SSatish Balay #undef __FUNC__
10925615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
109339b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
109439b95ed1SBarry Smith {
109539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
109639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1097c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1098c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1099c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
110039b95ed1SBarry Smith 
1101c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1102c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
110339b95ed1SBarry Smith 
110439b95ed1SBarry Smith   idx   = a->j;
110539b95ed1SBarry Smith   v     = a->a;
110639b95ed1SBarry Smith   ii    = a->i;
110739b95ed1SBarry Smith 
110839b95ed1SBarry Smith 
1109119a934aSSatish Balay   if (!a->mult_work) {
11103b547af2SSatish Balay     k = PetscMax(a->m,a->n);
11112b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1112119a934aSSatish Balay   }
1113119a934aSSatish Balay   work = a->mult_work;
1114119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1115119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1116119a934aSSatish Balay     ncols = n*bs;
1117119a934aSSatish Balay     workt = work;
1118119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1119119a934aSSatish Balay       xb = x + bs*(*idx++);
1120119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1121119a934aSSatish Balay       workt += bs;
1122119a934aSSatish Balay     }
11231a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1124119a934aSSatish Balay     v += n*bs2;
1125119a934aSSatish Balay     z += bs;
1126119a934aSSatish Balay   }
1127c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1128c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11291a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1130bb42667fSSatish Balay   return 0;
1131bb42667fSSatish Balay }
1132bb42667fSSatish Balay 
11335615d1e5SSatish Balay #undef __FUNC__
11345615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1135f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1136f44d3a6dSSatish Balay {
1137f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1138f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1139c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1140f44d3a6dSSatish Balay 
1141c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1142c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1143c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1144f44d3a6dSSatish Balay 
1145f44d3a6dSSatish Balay   idx   = a->j;
1146f44d3a6dSSatish Balay   v     = a->a;
1147f44d3a6dSSatish Balay   ii    = a->i;
1148f44d3a6dSSatish Balay 
1149f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1150f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1151f44d3a6dSSatish Balay     sum  = y[i];
1152f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1153f44d3a6dSSatish Balay     z[i] = sum;
1154f44d3a6dSSatish Balay   }
1155c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1156c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1157c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1158f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1159f44d3a6dSSatish Balay   return 0;
1160f44d3a6dSSatish Balay }
1161f44d3a6dSSatish Balay 
11625615d1e5SSatish Balay #undef __FUNC__
11635615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1164f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1165f44d3a6dSSatish Balay {
1166f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1167f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1168f44d3a6dSSatish Balay   register Scalar x1,x2;
1169f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1170c16cb8f2SBarry Smith   int             j,n;
1171f44d3a6dSSatish Balay 
1172c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1173c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1174c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1175f44d3a6dSSatish Balay 
1176f44d3a6dSSatish Balay   idx   = a->j;
1177f44d3a6dSSatish Balay   v     = a->a;
1178f44d3a6dSSatish Balay   ii    = a->i;
1179f44d3a6dSSatish Balay 
1180f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1181f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1182f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1183f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1184f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1185f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1186f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1187f44d3a6dSSatish Balay       v += 4;
1188f44d3a6dSSatish Balay     }
1189f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1190f44d3a6dSSatish Balay     z += 2; y += 2;
1191f44d3a6dSSatish Balay   }
1192c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1193c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1194c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1195f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1196f44d3a6dSSatish Balay   return 0;
1197f44d3a6dSSatish Balay }
1198f44d3a6dSSatish Balay 
11995615d1e5SSatish Balay #undef __FUNC__
12005615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1201f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1202f44d3a6dSSatish Balay {
1203f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1204f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1205c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1206f44d3a6dSSatish Balay 
1207c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1208c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1209c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1210f44d3a6dSSatish Balay 
1211f44d3a6dSSatish Balay   idx   = a->j;
1212f44d3a6dSSatish Balay   v     = a->a;
1213f44d3a6dSSatish Balay   ii    = a->i;
1214f44d3a6dSSatish Balay 
1215f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1216f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1217f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1218f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1219f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1220f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1221f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1222f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1223f44d3a6dSSatish Balay       v += 9;
1224f44d3a6dSSatish Balay     }
1225f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1226f44d3a6dSSatish Balay     z += 3; y += 3;
1227f44d3a6dSSatish Balay   }
1228c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1229c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1230c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1231f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1232f44d3a6dSSatish Balay   return 0;
1233f44d3a6dSSatish Balay }
1234f44d3a6dSSatish Balay 
12355615d1e5SSatish Balay #undef __FUNC__
12365615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1237f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1238f44d3a6dSSatish Balay {
1239f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1240f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1241f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1242f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1243c16cb8f2SBarry Smith   int             j,n;
1244f44d3a6dSSatish Balay 
1245c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1246c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1247c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1248f44d3a6dSSatish Balay 
1249f44d3a6dSSatish Balay   idx   = a->j;
1250f44d3a6dSSatish Balay   v     = a->a;
1251f44d3a6dSSatish Balay   ii    = a->i;
1252f44d3a6dSSatish Balay 
1253f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1254f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1255f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1256f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1257f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1258f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1259f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1260f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1261f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1262f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1263f44d3a6dSSatish Balay       v += 16;
1264f44d3a6dSSatish Balay     }
1265f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1266f44d3a6dSSatish Balay     z += 4; y += 4;
1267f44d3a6dSSatish Balay   }
1268c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1269c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1270c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1271f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1272f44d3a6dSSatish Balay   return 0;
1273f44d3a6dSSatish Balay }
1274f44d3a6dSSatish Balay 
12755615d1e5SSatish Balay #undef __FUNC__
12765615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1277f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1278f44d3a6dSSatish Balay {
1279f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1280f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1281f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1282c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1283f44d3a6dSSatish Balay 
1284c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1285c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1286c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1287f44d3a6dSSatish Balay 
1288f44d3a6dSSatish Balay   idx   = a->j;
1289f44d3a6dSSatish Balay   v     = a->a;
1290f44d3a6dSSatish Balay   ii    = a->i;
1291f44d3a6dSSatish Balay 
1292f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1293f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1294f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1295f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1296f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1297f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1298f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1299f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1300f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1301f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1302f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1303f44d3a6dSSatish Balay       v += 25;
1304f44d3a6dSSatish Balay     }
1305f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1306f44d3a6dSSatish Balay     z += 5; y += 5;
1307f44d3a6dSSatish Balay   }
1308c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1309c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1310c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1311f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1312f44d3a6dSSatish Balay   return 0;
1313f44d3a6dSSatish Balay }
1314f44d3a6dSSatish Balay 
13155615d1e5SSatish Balay #undef __FUNC__
131648e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
131748e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
131848e9ddb2SSatish Balay {
131948e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
132048e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
132148e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
132248e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
132348e9ddb2SSatish Balay 
132448e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
132548e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
132648e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
132748e9ddb2SSatish Balay 
132848e9ddb2SSatish Balay   idx   = a->j;
132948e9ddb2SSatish Balay   v     = a->a;
133048e9ddb2SSatish Balay   ii    = a->i;
133148e9ddb2SSatish Balay 
133248e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
133348e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
133448e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
133548e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
133648e9ddb2SSatish Balay       xb = x + 7*(*idx++);
133748e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
133848e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
133948e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
134048e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
134148e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
134248e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
134348e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
134448e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
134548e9ddb2SSatish Balay       v += 49;
134648e9ddb2SSatish Balay     }
134748e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
134848e9ddb2SSatish Balay     z += 7; y += 7;
134948e9ddb2SSatish Balay   }
135048e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
135148e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
135248e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
135348e9ddb2SSatish Balay   PLogFlops(98*a->nz);
135448e9ddb2SSatish Balay   return 0;
135548e9ddb2SSatish Balay }
135648e9ddb2SSatish Balay 
135748e9ddb2SSatish Balay 
135848e9ddb2SSatish Balay #undef __FUNC__
13595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1360f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1361f44d3a6dSSatish Balay {
1362f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1363f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1364f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1365f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1366f44d3a6dSSatish Balay 
1367f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1368f44d3a6dSSatish Balay 
1369c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1370c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1371f44d3a6dSSatish Balay 
1372f44d3a6dSSatish Balay   idx   = a->j;
1373f44d3a6dSSatish Balay   v     = a->a;
1374f44d3a6dSSatish Balay   ii    = a->i;
1375f44d3a6dSSatish Balay 
1376f44d3a6dSSatish Balay 
1377f44d3a6dSSatish Balay   if (!a->mult_work) {
1378f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1379f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1380f44d3a6dSSatish Balay   }
1381f44d3a6dSSatish Balay   work = a->mult_work;
1382f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1383f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1384f44d3a6dSSatish Balay     ncols = n*bs;
1385f44d3a6dSSatish Balay     workt = work;
1386f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1387f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1388f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1389f44d3a6dSSatish Balay       workt += bs;
1390f44d3a6dSSatish Balay     }
1391f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1392f44d3a6dSSatish Balay     v += n*bs2;
1393f44d3a6dSSatish Balay     z += bs;
1394f44d3a6dSSatish Balay   }
1395c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1396c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1397f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1398f44d3a6dSSatish Balay   return 0;
1399f44d3a6dSSatish Balay }
1400f44d3a6dSSatish Balay 
14015615d1e5SSatish Balay #undef __FUNC__
14025615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
14038f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1404bb42667fSSatish Balay {
1405bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
14061a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1407bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1408bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
14099df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1410119a934aSSatish Balay 
1411119a934aSSatish Balay 
141290f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
141390f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1414bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1415bb42667fSSatish Balay 
1416119a934aSSatish Balay   idx   = a->j;
1417119a934aSSatish Balay   v     = a->a;
1418119a934aSSatish Balay   ii    = a->i;
1419119a934aSSatish Balay 
1420119a934aSSatish Balay   switch (bs) {
1421119a934aSSatish Balay   case 1:
1422119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1423119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1424119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1425119a934aSSatish Balay       ib = idx + ai[i];
1426119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1427bb1453f0SSatish Balay         rval    = ib[j];
1428bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1429119a934aSSatish Balay       }
1430119a934aSSatish Balay     }
1431119a934aSSatish Balay     break;
1432119a934aSSatish Balay   case 2:
1433119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1434119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1435119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1436119a934aSSatish Balay       ib = idx + ai[i];
1437119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1438119a934aSSatish Balay         rval      = ib[j]*2;
1439bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1440bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1441119a934aSSatish Balay         v += 4;
1442119a934aSSatish Balay       }
1443119a934aSSatish Balay     }
1444119a934aSSatish Balay     break;
1445119a934aSSatish Balay   case 3:
1446119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1447119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1448119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1449119a934aSSatish Balay       ib = idx + ai[i];
1450119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1451119a934aSSatish Balay         rval      = ib[j]*3;
1452bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1453bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1454bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1455119a934aSSatish Balay         v += 9;
1456119a934aSSatish Balay       }
1457119a934aSSatish Balay     }
1458119a934aSSatish Balay     break;
1459119a934aSSatish Balay   case 4:
1460119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1461119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1462119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1463119a934aSSatish Balay       ib = idx + ai[i];
1464119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1465119a934aSSatish Balay         rval      = ib[j]*4;
1466bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1467bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1468bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1469bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1470119a934aSSatish Balay         v += 16;
1471119a934aSSatish Balay       }
1472119a934aSSatish Balay     }
1473119a934aSSatish Balay     break;
1474119a934aSSatish Balay   case 5:
1475119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1476119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1477119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1478119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1479119a934aSSatish Balay       ib = idx + ai[i];
1480119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1481119a934aSSatish Balay         rval      = ib[j]*5;
1482bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1483bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1484bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1485bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1486bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1487119a934aSSatish Balay         v += 25;
1488119a934aSSatish Balay       }
1489119a934aSSatish Balay     }
1490119a934aSSatish Balay     break;
1491119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1492119a934aSSatish Balay     default: {
1493119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1494119a934aSSatish Balay       if (!a->mult_work) {
14953b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1496bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1497119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1498119a934aSSatish Balay       }
1499119a934aSSatish Balay       work = a->mult_work;
1500119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1501119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1502119a934aSSatish Balay         ncols = n*bs;
1503119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1504119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1505119a934aSSatish Balay         v += n*bs2;
1506119a934aSSatish Balay         x += bs;
1507119a934aSSatish Balay         workt = work;
1508119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1509119a934aSSatish Balay           zb = z + bs*(*idx++);
1510bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1511119a934aSSatish Balay           workt += bs;
1512119a934aSSatish Balay         }
1513119a934aSSatish Balay       }
1514119a934aSSatish Balay     }
1515119a934aSSatish Balay   }
15160419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
15170419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1518faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1519faf6580fSSatish Balay   return 0;
1520faf6580fSSatish Balay }
15211c351548SSatish Balay 
15225615d1e5SSatish Balay #undef __FUNC__
15235615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
15248f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1525faf6580fSSatish Balay {
1526faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1527faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1528faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1529faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1530faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1531faf6580fSSatish Balay 
1532faf6580fSSatish Balay 
1533faf6580fSSatish Balay 
153490f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
153590f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1536faf6580fSSatish Balay 
1537648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1538648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1539648c1d95SSatish Balay 
1540faf6580fSSatish Balay 
1541faf6580fSSatish Balay   idx   = a->j;
1542faf6580fSSatish Balay   v     = a->a;
1543faf6580fSSatish Balay   ii    = a->i;
1544faf6580fSSatish Balay 
1545faf6580fSSatish Balay   switch (bs) {
1546faf6580fSSatish Balay   case 1:
1547faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1548faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1549faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1550faf6580fSSatish Balay       ib = idx + ai[i];
1551faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1552faf6580fSSatish Balay         rval    = ib[j];
1553faf6580fSSatish Balay         z[rval] += *v++ * x1;
1554faf6580fSSatish Balay       }
1555faf6580fSSatish Balay     }
1556faf6580fSSatish Balay     break;
1557faf6580fSSatish Balay   case 2:
1558faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1559faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1560faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1561faf6580fSSatish Balay       ib = idx + ai[i];
1562faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1563faf6580fSSatish Balay         rval      = ib[j]*2;
1564faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1565faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1566faf6580fSSatish Balay         v += 4;
1567faf6580fSSatish Balay       }
1568faf6580fSSatish Balay     }
1569faf6580fSSatish Balay     break;
1570faf6580fSSatish Balay   case 3:
1571faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1572faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1573faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1574faf6580fSSatish Balay       ib = idx + ai[i];
1575faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1576faf6580fSSatish Balay         rval      = ib[j]*3;
1577faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1578faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1579faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1580faf6580fSSatish Balay         v += 9;
1581faf6580fSSatish Balay       }
1582faf6580fSSatish Balay     }
1583faf6580fSSatish Balay     break;
1584faf6580fSSatish Balay   case 4:
1585faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1586faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1587faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1588faf6580fSSatish Balay       ib = idx + ai[i];
1589faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1590faf6580fSSatish Balay         rval      = ib[j]*4;
1591faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1592faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1593faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1594faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1595faf6580fSSatish Balay         v += 16;
1596faf6580fSSatish Balay       }
1597faf6580fSSatish Balay     }
1598faf6580fSSatish Balay     break;
1599faf6580fSSatish Balay   case 5:
1600faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1601faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1602faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1603faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1604faf6580fSSatish Balay       ib = idx + ai[i];
1605faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1606faf6580fSSatish Balay         rval      = ib[j]*5;
1607faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1608faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1609faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1610faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1611faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1612faf6580fSSatish Balay         v += 25;
1613faf6580fSSatish Balay       }
1614faf6580fSSatish Balay     }
1615faf6580fSSatish Balay     break;
1616faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1617faf6580fSSatish Balay     default: {
1618faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1619faf6580fSSatish Balay       if (!a->mult_work) {
1620faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1621faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1622faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1623faf6580fSSatish Balay       }
1624faf6580fSSatish Balay       work = a->mult_work;
1625faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1626faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1627faf6580fSSatish Balay         ncols = n*bs;
1628faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1629faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1630faf6580fSSatish Balay         v += n*bs2;
1631faf6580fSSatish Balay         x += bs;
1632faf6580fSSatish Balay         workt = work;
1633faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1634faf6580fSSatish Balay           zb = z + bs*(*idx++);
1635faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1636faf6580fSSatish Balay           workt += bs;
1637faf6580fSSatish Balay         }
1638faf6580fSSatish Balay       }
1639faf6580fSSatish Balay     }
1640faf6580fSSatish Balay   }
1641faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1642faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1643faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16442593348eSBarry Smith   return 0;
16452593348eSBarry Smith }
16462593348eSBarry Smith 
16475615d1e5SSatish Balay #undef __FUNC__
1648d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ"
16498f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16502593348eSBarry Smith {
1651b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16524e220ebcSLois Curfman McInnes 
16534e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16544e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16554e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16564e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16574e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16584e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16594e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16604e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16614e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16624e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16634e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16644e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16654e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16664e220ebcSLois Curfman McInnes   if (A->factor) {
16674e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16684e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16694e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16704e220ebcSLois Curfman McInnes   } else {
16714e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16724e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16734e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16744e220ebcSLois Curfman McInnes   }
16752593348eSBarry Smith   return 0;
16762593348eSBarry Smith }
16772593348eSBarry Smith 
16785615d1e5SSatish Balay #undef __FUNC__
16795615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
16808f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
168191d316f6SSatish Balay {
168291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
168391d316f6SSatish Balay 
1684e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
168591d316f6SSatish Balay 
168691d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
168791d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1688a7c10996SSatish Balay       (a->nz != b->nz)) {
168991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169091d316f6SSatish Balay   }
169191d316f6SSatish Balay 
169291d316f6SSatish Balay   /* if the a->i are the same */
169391d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
169491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169591d316f6SSatish Balay   }
169691d316f6SSatish Balay 
169791d316f6SSatish Balay   /* if a->j are the same */
169891d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
169991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
170091d316f6SSatish Balay   }
170191d316f6SSatish Balay 
170291d316f6SSatish Balay   /* if a->a are the same */
170391d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
170491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
170591d316f6SSatish Balay   }
170691d316f6SSatish Balay   *flg = PETSC_TRUE;
170791d316f6SSatish Balay   return 0;
170891d316f6SSatish Balay 
170991d316f6SSatish Balay }
171091d316f6SSatish Balay 
17115615d1e5SSatish Balay #undef __FUNC__
17125615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
17138f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
171491d316f6SSatish Balay {
171591d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17167e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
171717e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
171817e48fc4SSatish Balay 
17190513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
172017e48fc4SSatish Balay   bs   = a->bs;
172117e48fc4SSatish Balay   aa   = a->a;
172217e48fc4SSatish Balay   ai   = a->i;
172317e48fc4SSatish Balay   aj   = a->j;
172417e48fc4SSatish Balay   ambs = a->mbs;
17259df24120SSatish Balay   bs2  = a->bs2;
172691d316f6SSatish Balay 
172791d316f6SSatish Balay   VecSet(&zero,v);
172890f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1729e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
173017e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
173117e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
173217e48fc4SSatish Balay       if (aj[j] == i) {
173317e48fc4SSatish Balay         row  = i*bs;
17347e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17357e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
173691d316f6SSatish Balay         break;
173791d316f6SSatish Balay       }
173891d316f6SSatish Balay     }
173991d316f6SSatish Balay   }
174091d316f6SSatish Balay   return 0;
174191d316f6SSatish Balay }
174291d316f6SSatish Balay 
17435615d1e5SSatish Balay #undef __FUNC__
17445615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17458f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17469867e207SSatish Balay {
17479867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17489867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17497e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17509867e207SSatish Balay 
17519867e207SSatish Balay   ai  = a->i;
17529867e207SSatish Balay   aj  = a->j;
17539867e207SSatish Balay   aa  = a->a;
17549867e207SSatish Balay   m   = a->m;
17559867e207SSatish Balay   n   = a->n;
17569867e207SSatish Balay   bs  = a->bs;
17579867e207SSatish Balay   mbs = a->mbs;
17589df24120SSatish Balay   bs2 = a->bs2;
17599867e207SSatish Balay   if (ll) {
176090f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1761e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17629867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17639867e207SSatish Balay       M  = ai[i+1] - ai[i];
17649867e207SSatish Balay       li = l + i*bs;
17657e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17669867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17677e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17689867e207SSatish Balay           (*v++) *= li[k%bs];
17699867e207SSatish Balay         }
17709867e207SSatish Balay       }
17719867e207SSatish Balay     }
17729867e207SSatish Balay   }
17739867e207SSatish Balay 
17749867e207SSatish Balay   if (rr) {
177590f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1776e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17779867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17789867e207SSatish Balay       M  = ai[i+1] - ai[i];
17797e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17809867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17819867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17829867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17839867e207SSatish Balay           x = ri[k];
17849867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17859867e207SSatish Balay         }
17869867e207SSatish Balay       }
17879867e207SSatish Balay     }
17889867e207SSatish Balay   }
17899867e207SSatish Balay   return 0;
17909867e207SSatish Balay }
17919867e207SSatish Balay 
17929867e207SSatish Balay 
1793b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1794b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17952a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1796736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1797736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17981a6a6d98SBarry Smith 
17991a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
18001a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
18011a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
18021a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
18031a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
18041a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
180548e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
18061a6a6d98SBarry Smith 
18077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
18087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
18097fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
18107fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
18117fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
18127fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
18132593348eSBarry Smith 
18145615d1e5SSatish Balay #undef __FUNC__
18155615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
18168f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
18172593348eSBarry Smith {
1818b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18192593348eSBarry Smith   Scalar      *v = a->a;
18202593348eSBarry Smith   double      sum = 0.0;
18219df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18222593348eSBarry Smith 
18232593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18240419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18252593348eSBarry Smith #if defined(PETSC_COMPLEX)
18262593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18272593348eSBarry Smith #else
18282593348eSBarry Smith       sum += (*v)*(*v); v++;
18292593348eSBarry Smith #endif
18302593348eSBarry Smith     }
18312593348eSBarry Smith     *norm = sqrt(sum);
18322593348eSBarry Smith   }
18332593348eSBarry Smith   else {
1834e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18352593348eSBarry Smith   }
18362593348eSBarry Smith   return 0;
18372593348eSBarry Smith }
18382593348eSBarry Smith 
18393eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
18402593348eSBarry Smith /*
18412593348eSBarry Smith      note: This can only work for identity for row and col. It would
18422593348eSBarry Smith    be good to check this and otherwise generate an error.
18432593348eSBarry Smith */
18445615d1e5SSatish Balay #undef __FUNC__
18455615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
18468f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18472593348eSBarry Smith {
1848b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18492593348eSBarry Smith   Mat         outA;
1850de6a44a3SBarry Smith   int         ierr;
18512593348eSBarry Smith 
1852e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18532593348eSBarry Smith 
18542593348eSBarry Smith   outA          = inA;
18552593348eSBarry Smith   inA->factor   = FACTOR_LU;
18562593348eSBarry Smith   a->row        = row;
18572593348eSBarry Smith   a->col        = col;
18582593348eSBarry Smith 
1859eed86810SBarry Smith   if (!a->solve_work) {
1860de6a44a3SBarry Smith     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1861eed86810SBarry Smith     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1862eed86810SBarry Smith   }
18632593348eSBarry Smith 
18642593348eSBarry Smith   if (!a->diag) {
1865b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18662593348eSBarry Smith   }
18677fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18683eee16b0SBarry Smith 
18693eee16b0SBarry Smith   /*
18703eee16b0SBarry Smith       Blocksize 4 has a special faster solver for ILU(0) factorization
18713eee16b0SBarry Smith     with natural ordering
18723eee16b0SBarry Smith   */
18733eee16b0SBarry Smith   if (a->bs == 4) {
18743eee16b0SBarry Smith     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
18753eee16b0SBarry Smith   }
18763eee16b0SBarry Smith 
18772593348eSBarry Smith   return 0;
18782593348eSBarry Smith }
18792593348eSBarry Smith 
18805615d1e5SSatish Balay #undef __FUNC__
18815615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
18828f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18832593348eSBarry Smith {
1884b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18859df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1886b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1887b6490206SBarry Smith   PLogFlops(totalnz);
18882593348eSBarry Smith   return 0;
18892593348eSBarry Smith }
18902593348eSBarry Smith 
18915615d1e5SSatish Balay #undef __FUNC__
18925615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18938f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18947e67e3f9SSatish Balay {
18957e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18967e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1897a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18989df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18997e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
19007e67e3f9SSatish Balay 
19017e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
19027e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1903e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1904e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1905a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
19067e67e3f9SSatish Balay     nrow = ailen[brow];
19077e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1908e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1909e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1910a7c10996SSatish Balay       col  = in[l] ;
19117e67e3f9SSatish Balay       bcol = col/bs;
19127e67e3f9SSatish Balay       cidx = col%bs;
19137e67e3f9SSatish Balay       ridx = row%bs;
19147e67e3f9SSatish Balay       high = nrow;
19157e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
19167e67e3f9SSatish Balay       while (high-low > 5) {
19177e67e3f9SSatish Balay         t = (low+high)/2;
19187e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
19197e67e3f9SSatish Balay         else             low  = t;
19207e67e3f9SSatish Balay       }
19217e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
19227e67e3f9SSatish Balay         if (rp[i] > bcol) break;
19237e67e3f9SSatish Balay         if (rp[i] == bcol) {
19247e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
19257e67e3f9SSatish Balay           goto finished;
19267e67e3f9SSatish Balay         }
19277e67e3f9SSatish Balay       }
19287e67e3f9SSatish Balay       *v++ = zero;
19297e67e3f9SSatish Balay       finished:;
19307e67e3f9SSatish Balay     }
19317e67e3f9SSatish Balay   }
19327e67e3f9SSatish Balay   return 0;
19337e67e3f9SSatish Balay }
19347e67e3f9SSatish Balay 
19355615d1e5SSatish Balay #undef __FUNC__
1936d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
19378f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19385a838052SSatish Balay {
19395a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19405a838052SSatish Balay   *bs = baij->bs;
19415a838052SSatish Balay   return 0;
19425a838052SSatish Balay }
19435a838052SSatish Balay 
1944d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19455615d1e5SSatish Balay #undef __FUNC__
19465615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1947d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1948d9b7c43dSSatish Balay {
1949d9b7c43dSSatish Balay   int i,row;
1950d9b7c43dSSatish Balay   row = idx[0];
1951d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1952d9b7c43dSSatish Balay 
1953d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1954d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1955d9b7c43dSSatish Balay   }
1956d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1957d9b7c43dSSatish Balay   return 0;
1958d9b7c43dSSatish Balay }
1959d9b7c43dSSatish Balay 
19605615d1e5SSatish Balay #undef __FUNC__
19615615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
19628f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1963d9b7c43dSSatish Balay {
1964d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1965d9b7c43dSSatish Balay   IS          is_local;
1966d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1967d9b7c43dSSatish Balay   PetscTruth  flg;
1968d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1969d9b7c43dSSatish Balay 
1970d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1971d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1972d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1973537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1974d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1975d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1976d9b7c43dSSatish Balay 
1977d9b7c43dSSatish Balay   i = 0;
1978d9b7c43dSSatish Balay   while (i < is_n) {
1979e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1980d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1981d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1982d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1983d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1984d9b7c43dSSatish Balay       i += bs;
1985d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1986d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1987d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1988d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1989d9b7c43dSSatish Balay         aa[0] = zero;
1990d9b7c43dSSatish Balay         aa+=bs;
1991d9b7c43dSSatish Balay       }
1992d9b7c43dSSatish Balay       i++;
1993d9b7c43dSSatish Balay     }
1994d9b7c43dSSatish Balay   }
1995d9b7c43dSSatish Balay   if (diag) {
1996d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1997d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1998d9b7c43dSSatish Balay     }
1999d9b7c43dSSatish Balay   }
2000d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
2001d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
2002d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
20039a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2004d9b7c43dSSatish Balay 
2005d9b7c43dSSatish Balay   return 0;
2006d9b7c43dSSatish Balay }
20071c351548SSatish Balay 
20085615d1e5SSatish Balay #undef __FUNC__
2009d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ"
2010ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
2011ba4ca20aSSatish Balay {
2012ba4ca20aSSatish Balay   static int called = 0;
2013ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
2014ba4ca20aSSatish Balay 
2015ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
2016ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2017ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
2018ba4ca20aSSatish Balay   return 0;
2019ba4ca20aSSatish Balay }
2020d9b7c43dSSatish Balay 
20212593348eSBarry Smith /* -------------------------------------------------------------------*/
2022cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
20239867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2024f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2025faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
20261a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
2027b6490206SBarry Smith        0,0,
2028de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
2029b6490206SBarry Smith        0,
2030f2501298SSatish Balay        MatTranspose_SeqBAIJ,
203117e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
20329867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2033584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2034b6490206SBarry Smith        0,
2035d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20367fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2037b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2038de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2039d402145bSBarry Smith        0,0,
2040b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2041b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2042af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20437e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2044ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20453b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20463b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
204792c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
204892c4ed94SBarry Smith        0,0,0,0,0,0,
204992c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20502593348eSBarry Smith 
20515615d1e5SSatish Balay #undef __FUNC__
20525615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20532593348eSBarry Smith /*@C
205444cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
205544cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
205644cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20572bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20582bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20592593348eSBarry Smith 
20602593348eSBarry Smith    Input Parameters:
2061029af93fSBarry Smith .  comm - MPI communicator, set to PETSC_COMM_SELF
2062b6490206SBarry Smith .  bs - size of block
20632593348eSBarry Smith .  m - number of rows
20642593348eSBarry Smith .  n - number of columns
2065b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20662bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20672bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20682593348eSBarry Smith 
20692593348eSBarry Smith    Output Parameter:
20702593348eSBarry Smith .  A - the matrix
20712593348eSBarry Smith 
20720513a670SBarry Smith    Options Database Keys:
20730513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20740effe34fSLois Curfman McInnes $                     block calculations (much slower)
20750513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20760513a670SBarry Smith 
20772593348eSBarry Smith    Notes:
207844cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20792593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
208044cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20812593348eSBarry Smith 
20822593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20832593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20842593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20856da5968aSLois Curfman McInnes    matrices.
20862593348eSBarry Smith 
208744cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20882593348eSBarry Smith @*/
2089b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20902593348eSBarry Smith {
20912593348eSBarry Smith   Mat         B;
2092b6490206SBarry Smith   Mat_SeqBAIJ *b;
20933b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20943b2fbd54SBarry Smith 
20953b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2096e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2097b6490206SBarry Smith 
20980513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20990513a670SBarry Smith 
2100f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2101e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
21022593348eSBarry Smith 
21032593348eSBarry Smith   *A = 0;
2104d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
21052593348eSBarry Smith   PLogObjectCreate(B);
2106b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
210744cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
21082593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
21091a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
21101a6a6d98SBarry Smith   if (!flg) {
21117fc0212eSBarry Smith     switch (bs) {
21127fc0212eSBarry Smith     case 1:
21137fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21141a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
211539b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2116f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
21177fc0212eSBarry Smith       break;
21184eeb42bcSBarry Smith     case 2:
21194eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
21201a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
212139b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2122f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
21236d84be18SBarry Smith       break;
2124f327dd97SBarry Smith     case 3:
2125f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
21261a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
212739b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2128f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
21294eeb42bcSBarry Smith       break;
21306d84be18SBarry Smith     case 4:
21316d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
21321a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
213339b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2134f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21356d84be18SBarry Smith       break;
21366d84be18SBarry Smith     case 5:
21376d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21381a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
213939b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2140f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21416d84be18SBarry Smith       break;
214248e9ddb2SSatish Balay     case 7:
214348e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
214448e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
214548e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
214648e9ddb2SSatish Balay       break;
21477fc0212eSBarry Smith     }
21481a6a6d98SBarry Smith   }
2149b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2150b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21512593348eSBarry Smith   B->factor           = 0;
21522593348eSBarry Smith   B->lupivotthreshold = 1.0;
215390f02eecSBarry Smith   B->mapping          = 0;
21542593348eSBarry Smith   b->row              = 0;
21552593348eSBarry Smith   b->col              = 0;
21562593348eSBarry Smith   b->reallocs         = 0;
21572593348eSBarry Smith 
215844cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
215944cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2160b6490206SBarry Smith   b->mbs     = mbs;
2161f2501298SSatish Balay   b->nbs     = nbs;
2162b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21632593348eSBarry Smith   if (nnz == PETSC_NULL) {
2164119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21652593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2166b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2167b6490206SBarry Smith     nz = nz*mbs;
21682593348eSBarry Smith   }
21692593348eSBarry Smith   else {
21702593348eSBarry Smith     nz = 0;
2171b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21722593348eSBarry Smith   }
21732593348eSBarry Smith 
21742593348eSBarry Smith   /* allocate the matrix space */
21757e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21762593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21777e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21787e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21792593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21802593348eSBarry Smith   b->i  = b->j + nz;
21812593348eSBarry Smith   b->singlemalloc = 1;
21822593348eSBarry Smith 
2183de6a44a3SBarry Smith   b->i[0] = 0;
2184b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21852593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21862593348eSBarry Smith   }
21872593348eSBarry Smith 
2188b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2189b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2190f09e8eb9SSatish Balay   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2191b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21922593348eSBarry Smith 
2193b6490206SBarry Smith   b->bs               = bs;
21949df24120SSatish Balay   b->bs2              = bs2;
2195b6490206SBarry Smith   b->mbs              = mbs;
21962593348eSBarry Smith   b->nz               = 0;
219718e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21982593348eSBarry Smith   b->sorted           = 0;
21992593348eSBarry Smith   b->roworiented      = 1;
22002593348eSBarry Smith   b->nonew            = 0;
22012593348eSBarry Smith   b->diag             = 0;
22022593348eSBarry Smith   b->solve_work       = 0;
2203de6a44a3SBarry Smith   b->mult_work        = 0;
22042593348eSBarry Smith   b->spptr            = 0;
22054e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
22064e220ebcSLois Curfman McInnes 
22072593348eSBarry Smith   *A = B;
22082593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
22092593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
22102593348eSBarry Smith   return 0;
22112593348eSBarry Smith }
22122593348eSBarry Smith 
22135615d1e5SSatish Balay #undef __FUNC__
22145615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2215b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
22162593348eSBarry Smith {
22172593348eSBarry Smith   Mat         C;
2218b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
22199df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2220de6a44a3SBarry Smith 
2221e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
22222593348eSBarry Smith 
22232593348eSBarry Smith   *B = 0;
2224d4bb536fSBarry Smith   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
22252593348eSBarry Smith   PLogObjectCreate(C);
2226b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
22272593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2228b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2229b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
22302593348eSBarry Smith   C->factor     = A->factor;
22312593348eSBarry Smith   c->row        = 0;
22322593348eSBarry Smith   c->col        = 0;
22332593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22342593348eSBarry Smith 
223544cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
223644cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
223744cd7ae7SLois Curfman McInnes   C->M          = a->m;
223844cd7ae7SLois Curfman McInnes   C->N          = a->n;
223944cd7ae7SLois Curfman McInnes 
2240b6490206SBarry Smith   c->bs         = a->bs;
22419df24120SSatish Balay   c->bs2        = a->bs2;
2242b6490206SBarry Smith   c->mbs        = a->mbs;
2243b6490206SBarry Smith   c->nbs        = a->nbs;
22442593348eSBarry Smith 
2245b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2246b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2247b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22482593348eSBarry Smith     c->imax[i] = a->imax[i];
22492593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22502593348eSBarry Smith   }
22512593348eSBarry Smith 
22522593348eSBarry Smith   /* allocate the matrix space */
22532593348eSBarry Smith   c->singlemalloc = 1;
22547e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22552593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22567e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2257de6a44a3SBarry Smith   c->i  = c->j + nz;
2258b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2259b6490206SBarry Smith   if (mbs > 0) {
2260de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22612593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22627e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22632593348eSBarry Smith     }
22642593348eSBarry Smith   }
22652593348eSBarry Smith 
2266f09e8eb9SSatish Balay   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
22672593348eSBarry Smith   c->sorted      = a->sorted;
22682593348eSBarry Smith   c->roworiented = a->roworiented;
22692593348eSBarry Smith   c->nonew       = a->nonew;
22702593348eSBarry Smith 
22712593348eSBarry Smith   if (a->diag) {
2272b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2273b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2274b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22752593348eSBarry Smith       c->diag[i] = a->diag[i];
22762593348eSBarry Smith     }
22772593348eSBarry Smith   }
22782593348eSBarry Smith   else c->diag          = 0;
22792593348eSBarry Smith   c->nz                 = a->nz;
22802593348eSBarry Smith   c->maxnz              = a->maxnz;
22812593348eSBarry Smith   c->solve_work         = 0;
22822593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22837fc0212eSBarry Smith   c->mult_work          = 0;
22842593348eSBarry Smith   *B = C;
22852593348eSBarry Smith   return 0;
22862593348eSBarry Smith }
22872593348eSBarry Smith 
22885615d1e5SSatish Balay #undef __FUNC__
22895615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
229019bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22912593348eSBarry Smith {
2292b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22932593348eSBarry Smith   Mat          B;
2294de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2295b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
229635aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2297a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2298b6490206SBarry Smith   Scalar       *aa;
229919bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
23002593348eSBarry Smith 
23011a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2302de6a44a3SBarry Smith   bs2  = bs*bs;
2303b6490206SBarry Smith 
23042593348eSBarry Smith   MPI_Comm_size(comm,&size);
2305e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
230690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
23070752156aSBarry Smith   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
2308e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
23092593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
23102593348eSBarry Smith 
2311e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
231235aab85fSBarry Smith 
231335aab85fSBarry Smith   /*
231435aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
231535aab85fSBarry Smith     divisible by the blocksize
231635aab85fSBarry Smith   */
2317b6490206SBarry Smith   mbs        = M/bs;
231835aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
231935aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
232035aab85fSBarry Smith   else                  mbs++;
232135aab85fSBarry Smith   if (extra_rows) {
2322537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
232335aab85fSBarry Smith   }
2324b6490206SBarry Smith 
23252593348eSBarry Smith   /* read in row lengths */
232635aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
23270752156aSBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
232835aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
23292593348eSBarry Smith 
2330b6490206SBarry Smith   /* read in column indices */
233135aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
23320752156aSBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
233335aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2334b6490206SBarry Smith 
2335b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2336b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2337b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
233835aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
233935aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
234035aab85fSBarry Smith   masked = mask + mbs;
2341b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2342b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
234335aab85fSBarry Smith     nmask = 0;
2344b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2345b6490206SBarry Smith       kmax = rowlengths[rowcount];
2346b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
234735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
234835aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2349b6490206SBarry Smith       }
2350b6490206SBarry Smith       rowcount++;
2351b6490206SBarry Smith     }
235235aab85fSBarry Smith     browlengths[i] += nmask;
235335aab85fSBarry Smith     /* zero out the mask elements we set */
235435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2355b6490206SBarry Smith   }
2356b6490206SBarry Smith 
23572593348eSBarry Smith   /* create our matrix */
23583eee16b0SBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
23592593348eSBarry Smith   B = *A;
2360b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23612593348eSBarry Smith 
23622593348eSBarry Smith   /* set matrix "i" values */
2363de6a44a3SBarry Smith   a->i[0] = 0;
2364b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2365b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2366b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23672593348eSBarry Smith   }
2368b6490206SBarry Smith   a->nz         = 0;
2369b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23702593348eSBarry Smith 
2371b6490206SBarry Smith   /* read in nonzero values */
237235aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
23730752156aSBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
237435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2375b6490206SBarry Smith 
2376b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2377b6490206SBarry Smith   nzcount = 0; jcount = 0;
2378b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2379b6490206SBarry Smith     nzcountb = nzcount;
238035aab85fSBarry Smith     nmask    = 0;
2381b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2382b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2383b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
238435aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
238535aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2386b6490206SBarry Smith       }
2387b6490206SBarry Smith       rowcount++;
2388b6490206SBarry Smith     }
2389de6a44a3SBarry Smith     /* sort the masked values */
239077c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2391de6a44a3SBarry Smith 
2392b6490206SBarry Smith     /* set "j" values into matrix */
2393b6490206SBarry Smith     maskcount = 1;
239435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
239535aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2396de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2397b6490206SBarry Smith     }
2398b6490206SBarry Smith     /* set "a" values into matrix */
2399de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2400b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2401b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2402b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2403de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2404de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2405de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2406de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2407b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2408b6490206SBarry Smith       }
2409b6490206SBarry Smith     }
241035aab85fSBarry Smith     /* zero out the mask elements we set */
241135aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2412b6490206SBarry Smith   }
2413e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2414b6490206SBarry Smith 
2415b6490206SBarry Smith   PetscFree(rowlengths);
2416b6490206SBarry Smith   PetscFree(browlengths);
2417b6490206SBarry Smith   PetscFree(aa);
2418b6490206SBarry Smith   PetscFree(jj);
2419b6490206SBarry Smith   PetscFree(mask);
2420b6490206SBarry Smith 
2421b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2422de6a44a3SBarry Smith 
2423*9c01be13SBarry Smith   ierr = MatView_Private(B); CHKERRQ(ierr);
24242593348eSBarry Smith   return 0;
24252593348eSBarry Smith }
24262593348eSBarry Smith 
24272593348eSBarry Smith 
24282593348eSBarry Smith 
2429