xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0752156a28ac8f8e9dfaef7ce98457a01bf27fb6)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*0752156aSBarry Smith static char vcid[] = "$Id: baij.c,v 1.111 1997/08/29 17:15:56 curfman Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
93369ce9aSBarry Smith 
103369ce9aSBarry Smith #include "pinclude/pviewer.h"
113369ce9aSBarry Smith #include "sys.h"
1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
131a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
141a6a6d98SBarry Smith #include "src/inline/spops.h"
152593348eSBarry Smith #include "petsc.h"
163b547af2SSatish Balay 
172593348eSBarry Smith 
18de6a44a3SBarry Smith /*
19de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
20de6a44a3SBarry Smith */
21de6a44a3SBarry Smith 
225615d1e5SSatish Balay #undef __FUNC__
235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ"
24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
25de6a44a3SBarry Smith {
26de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
277fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
28de6a44a3SBarry Smith 
29de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
30de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
317fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
32de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
33de6a44a3SBarry Smith       if (a->j[j] == i) {
34de6a44a3SBarry Smith         diag[i] = j;
35de6a44a3SBarry Smith         break;
36de6a44a3SBarry Smith       }
37de6a44a3SBarry Smith     }
38de6a44a3SBarry Smith   }
39de6a44a3SBarry Smith   a->diag = diag;
40de6a44a3SBarry Smith   return 0;
41de6a44a3SBarry Smith }
422593348eSBarry Smith 
432593348eSBarry Smith 
443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
453b2fbd54SBarry Smith 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
503b2fbd54SBarry Smith {
513b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
523b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
533b2fbd54SBarry Smith 
543b2fbd54SBarry Smith   *nn = n;
553b2fbd54SBarry Smith   if (!ia) return 0;
563b2fbd54SBarry Smith   if (symmetric) {
573b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
583b2fbd54SBarry Smith   } else if (oshift == 1) {
593b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
603b2fbd54SBarry Smith     int nz = a->i[n] + 1;
613b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
623b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
633b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
643b2fbd54SBarry Smith   } else {
653b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
663b2fbd54SBarry Smith   }
673b2fbd54SBarry Smith 
683b2fbd54SBarry Smith   return 0;
693b2fbd54SBarry Smith }
703b2fbd54SBarry Smith 
715615d1e5SSatish Balay #undef __FUNC__
72d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
743b2fbd54SBarry Smith                                 PetscTruth *done)
753b2fbd54SBarry Smith {
763b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
773b2fbd54SBarry Smith   int         i,n = a->mbs;
783b2fbd54SBarry Smith 
793b2fbd54SBarry Smith   if (!ia) return 0;
803b2fbd54SBarry Smith   if (symmetric) {
813b2fbd54SBarry Smith     PetscFree(*ia);
823b2fbd54SBarry Smith     PetscFree(*ja);
83af5da2bfSBarry Smith   } else if (oshift == 1) {
843b2fbd54SBarry Smith     int nz = a->i[n];
853b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
863b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
873b2fbd54SBarry Smith   }
883b2fbd54SBarry Smith   return 0;
893b2fbd54SBarry Smith }
903b2fbd54SBarry Smith 
913b2fbd54SBarry Smith 
925615d1e5SSatish Balay #undef __FUNC__
93d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary"
94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
952593348eSBarry Smith {
96b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
979df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
98b6490206SBarry Smith   Scalar      *aa;
992593348eSBarry Smith 
10090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1012593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1022593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1033b2fbd54SBarry Smith 
1042593348eSBarry Smith   col_lens[1] = a->m;
1052593348eSBarry Smith   col_lens[2] = a->n;
1067e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1072593348eSBarry Smith 
1082593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
109b6490206SBarry Smith   count = 0;
110b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
111b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
112b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
113b6490206SBarry Smith     }
1142593348eSBarry Smith   }
115*0752156aSBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
1162593348eSBarry Smith   PetscFree(col_lens);
1172593348eSBarry Smith 
1182593348eSBarry Smith   /* store column indices (zero start index) */
11966963ce1SSatish Balay   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
120b6490206SBarry Smith   count = 0;
121b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
122b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
123b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
124b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
125b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1262593348eSBarry Smith         }
1272593348eSBarry Smith       }
128b6490206SBarry Smith     }
129b6490206SBarry Smith   }
130*0752156aSBarry Smith   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
131b6490206SBarry Smith   PetscFree(jj);
1322593348eSBarry Smith 
1332593348eSBarry Smith   /* store nonzero values */
13466963ce1SSatish Balay   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
135b6490206SBarry Smith   count = 0;
136b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
137b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
138b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
139b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1407e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
141b6490206SBarry Smith         }
142b6490206SBarry Smith       }
143b6490206SBarry Smith     }
144b6490206SBarry Smith   }
145*0752156aSBarry Smith   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
146b6490206SBarry Smith   PetscFree(aa);
1472593348eSBarry Smith   return 0;
1482593348eSBarry Smith }
1492593348eSBarry Smith 
1505615d1e5SSatish Balay #undef __FUNC__
151d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII"
152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1532593348eSBarry Smith {
154b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1562593348eSBarry Smith   FILE        *fd;
1572593348eSBarry Smith   char        *outputname;
1582593348eSBarry Smith 
15990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1602593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
162639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1637ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1642593348eSBarry Smith   }
165639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
166e3372554SBarry Smith     SETERRQ(1,0,"Matlab format not supported");
1672593348eSBarry Smith   }
168639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
175766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
178766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
179766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
180766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
18144cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18344cd7ae7SLois Curfman McInnes #else
18444cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18544cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18644cd7ae7SLois Curfman McInnes #endif
18744cd7ae7SLois Curfman McInnes           }
18844cd7ae7SLois Curfman McInnes         }
18944cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19044cd7ae7SLois Curfman McInnes       }
19144cd7ae7SLois Curfman McInnes     }
19244cd7ae7SLois Curfman McInnes   }
1932593348eSBarry Smith   else {
194b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
195b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
196b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
197b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
200766eeae4SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) > 0.0) {
20188685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2027e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
204766eeae4SLois Curfman McInnes           else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) {
205766eeae4SLois Curfman McInnes             fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
206766eeae4SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j]));
207766eeae4SLois Curfman McInnes           }
20888685aaeSLois Curfman McInnes           else {
2097e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
21088685aaeSLois Curfman McInnes           }
21188685aaeSLois Curfman McInnes #else
2127e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
21388685aaeSLois Curfman McInnes #endif
2142593348eSBarry Smith           }
2152593348eSBarry Smith         }
2162593348eSBarry Smith         fprintf(fd,"\n");
2172593348eSBarry Smith       }
2182593348eSBarry Smith     }
219b6490206SBarry Smith   }
2202593348eSBarry Smith   fflush(fd);
2212593348eSBarry Smith   return 0;
2222593348eSBarry Smith }
2232593348eSBarry Smith 
2245615d1e5SSatish Balay #undef __FUNC__
225d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw"
2263270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2273270192aSSatish Balay {
2283270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2293270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2303270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2313270192aSSatish Balay   Scalar       *aa;
2323270192aSSatish Balay   Draw         draw;
2333270192aSSatish Balay   DrawButton   button;
2343270192aSSatish Balay   PetscTruth   isnull;
2353270192aSSatish Balay 
2363270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2373270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2383270192aSSatish Balay 
2393270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2403270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2413270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2423270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2433270192aSSatish Balay   color = DRAW_BLUE;
2443270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2453270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2463270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2473270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2483270192aSSatish Balay       aa = a->a + j*bs2;
2493270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2503270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2513270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
252b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2533270192aSSatish Balay         }
2543270192aSSatish Balay       }
2553270192aSSatish Balay     }
2563270192aSSatish Balay   }
2573270192aSSatish Balay   color = DRAW_CYAN;
2583270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2593270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2603270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2613270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2623270192aSSatish Balay       aa = a->a + j*bs2;
2633270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2643270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2653270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
266b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2673270192aSSatish Balay         }
2683270192aSSatish Balay       }
2693270192aSSatish Balay     }
2703270192aSSatish Balay   }
2713270192aSSatish Balay 
2723270192aSSatish Balay   color = DRAW_RED;
2733270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2743270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2753270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2763270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2773270192aSSatish Balay       aa = a->a + j*bs2;
2783270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2793270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2803270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
281b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
2823270192aSSatish Balay         }
2833270192aSSatish Balay       }
2843270192aSSatish Balay     }
2853270192aSSatish Balay   }
2863270192aSSatish Balay 
2873270192aSSatish Balay   DrawFlush(draw);
2883270192aSSatish Balay   DrawGetPause(draw,&pause);
2893270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2903270192aSSatish Balay 
2913270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2923270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2933270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2943270192aSSatish Balay     DrawClear(draw);
2953270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2963270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2973270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2983270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2993270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
3003270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
3013270192aSSatish Balay     w *= scale; h *= scale;
3023270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
3033270192aSSatish Balay     color = DRAW_BLUE;
3043270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3053270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3063270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3073270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3083270192aSSatish Balay         aa = a->a + j*bs2;
3093270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3103270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3113270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
312b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3133270192aSSatish Balay           }
3143270192aSSatish Balay         }
3153270192aSSatish Balay       }
3163270192aSSatish Balay     }
3173270192aSSatish Balay     color = DRAW_CYAN;
3183270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3193270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3203270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3213270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3223270192aSSatish Balay         aa = a->a + j*bs2;
3233270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3243270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3253270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
326b34f160eSSatish Balay           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3273270192aSSatish Balay           }
3283270192aSSatish Balay         }
3293270192aSSatish Balay       }
3303270192aSSatish Balay     }
3313270192aSSatish Balay 
3323270192aSSatish Balay     color = DRAW_RED;
3333270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3343270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3353270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3363270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3373270192aSSatish Balay         aa = a->a + j*bs2;
3383270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3393270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3403270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
341b34f160eSSatish Balay             DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
3423270192aSSatish Balay           }
3433270192aSSatish Balay         }
3443270192aSSatish Balay       }
3453270192aSSatish Balay     }
3463270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3473270192aSSatish Balay   }
3483270192aSSatish Balay   return 0;
3493270192aSSatish Balay }
3503270192aSSatish Balay 
3515615d1e5SSatish Balay #undef __FUNC__
352d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ"
3538f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3542593348eSBarry Smith {
3552593348eSBarry Smith   Mat         A = (Mat) obj;
35619bcc07fSBarry Smith   ViewerType  vtype;
35719bcc07fSBarry Smith   int         ierr;
3582593348eSBarry Smith 
35919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
361e3372554SBarry Smith     SETERRQ(1,0,"Matlab viewer not supported");
3622593348eSBarry Smith   }
36319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
364b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3652593348eSBarry Smith   }
36619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
367b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3682593348eSBarry Smith   }
36919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3703270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3712593348eSBarry Smith   }
3722593348eSBarry Smith   return 0;
3732593348eSBarry Smith }
374b6490206SBarry Smith 
375119a934aSSatish Balay #define CHUNKSIZE  10
376cd0e1443SSatish Balay 
3775615d1e5SSatish Balay #undef __FUNC__
3785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ"
379639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
380cd0e1443SSatish Balay {
381cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
382cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
383cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
384a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3859df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
386cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
387cd0e1443SSatish Balay 
388cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
389cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3903b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
391e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
392e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
3933b2fbd54SBarry Smith #endif
3942c3acbe9SBarry Smith     rp   = aj + ai[brow];
3952c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
3962c3acbe9SBarry Smith     rmax = imax[brow];
3972c3acbe9SBarry Smith     nrow = ailen[brow];
398cd0e1443SSatish Balay     low  = 0;
399cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
4003b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
401e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
402e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
4033b2fbd54SBarry Smith #endif
404a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
405cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
406cd0e1443SSatish Balay       if (roworiented) {
407cd0e1443SSatish Balay         value = *v++;
408cd0e1443SSatish Balay       }
409cd0e1443SSatish Balay       else {
410cd0e1443SSatish Balay         value = v[k + l*m];
411cd0e1443SSatish Balay       }
412cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
4132c3acbe9SBarry Smith       while (high-low > 7) {
414cd0e1443SSatish Balay         t = (low+high)/2;
415cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
416cd0e1443SSatish Balay         else              low  = t;
417cd0e1443SSatish Balay       }
418cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
419cd0e1443SSatish Balay         if (rp[i] > bcol) break;
420cd0e1443SSatish Balay         if (rp[i] == bcol) {
4217e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
422cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
423cd0e1443SSatish Balay           else                  *bap  = value;
424f1241b54SBarry Smith           goto noinsert1;
425cd0e1443SSatish Balay         }
426cd0e1443SSatish Balay       }
427c2653b3dSLois Curfman McInnes       if (nonew == 1) goto noinsert1;
42811ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
429cd0e1443SSatish Balay       if (nrow >= rmax) {
430cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
431cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
432cd0e1443SSatish Balay         Scalar *new_a;
433cd0e1443SSatish Balay 
43411ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
43596854ed6SLois Curfman McInnes 
43696854ed6SLois Curfman McInnes         /* Malloc new storage space */
4377e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
438cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4397e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
440cd0e1443SSatish Balay         new_i   = new_j + new_nz;
441cd0e1443SSatish Balay 
442cd0e1443SSatish Balay         /* copy over old data into new slots */
443cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4447e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
445a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
446a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
447a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
448cd0e1443SSatish Balay                                                            len*sizeof(int));
449a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
450a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
451a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
452a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
453cd0e1443SSatish Balay         /* free up old matrix storage */
454cd0e1443SSatish Balay         PetscFree(a->a);
455cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
456cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
457cd0e1443SSatish Balay         a->singlemalloc = 1;
458cd0e1443SSatish Balay 
459a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
460cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4617e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
46218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
463cd0e1443SSatish Balay         a->reallocs++;
464119a934aSSatish Balay         a->nz++;
465cd0e1443SSatish Balay       }
4667e67e3f9SSatish Balay       N = nrow++ - 1;
467cd0e1443SSatish Balay       /* shift up all the later entries in this row */
468cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
469cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4707e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
471cd0e1443SSatish Balay       }
4727e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
473cd0e1443SSatish Balay       rp[i]                      = bcol;
4747e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
475f1241b54SBarry Smith       noinsert1:;
476cd0e1443SSatish Balay       low = i;
477cd0e1443SSatish Balay     }
478cd0e1443SSatish Balay     ailen[brow] = nrow;
479cd0e1443SSatish Balay   }
480cd0e1443SSatish Balay   return 0;
481cd0e1443SSatish Balay }
482cd0e1443SSatish Balay 
4835615d1e5SSatish Balay #undef __FUNC__
48405a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
48592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
48692c4ed94SBarry Smith {
48792c4ed94SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4888a84c255SSatish Balay   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
489dd9472c6SBarry Smith   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
490dd9472c6SBarry Smith   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
4910e324ae4SSatish Balay   Scalar      *ap,*value=v,*aa=a->a,*bap;
49292c4ed94SBarry Smith 
4930e324ae4SSatish Balay   if (roworiented) {
4940e324ae4SSatish Balay     stepval = (n-1)*bs;
4950e324ae4SSatish Balay   } else {
4960e324ae4SSatish Balay     stepval = (m-1)*bs;
4970e324ae4SSatish Balay   }
49892c4ed94SBarry Smith   for ( k=0; k<m; k++ ) { /* loop over added rows */
49992c4ed94SBarry Smith     row  = im[k];
50092c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
50192c4ed94SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
5028a84c255SSatish Balay     if (row >= a->mbs) SETERRQ(1,0,"Row too large");
50392c4ed94SBarry Smith #endif
50492c4ed94SBarry Smith     rp   = aj + ai[row];
50592c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
50692c4ed94SBarry Smith     rmax = imax[row];
50792c4ed94SBarry Smith     nrow = ailen[row];
50892c4ed94SBarry Smith     low  = 0;
50992c4ed94SBarry Smith     for ( l=0; l<n; l++ ) { /* loop over added columns */
51092c4ed94SBarry Smith #if defined(PETSC_BOPT_g)
51192c4ed94SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
5128a84c255SSatish Balay       if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large");
51392c4ed94SBarry Smith #endif
51492c4ed94SBarry Smith       col = in[l];
51592c4ed94SBarry Smith       if (roworiented) {
5160e324ae4SSatish Balay         value = v + k*(stepval+bs)*bs + l*bs;
5170e324ae4SSatish Balay       } else {
5180e324ae4SSatish Balay         value = v + l*(stepval+bs)*bs + k*bs;
51992c4ed94SBarry Smith       }
52092c4ed94SBarry Smith       if (!sorted) low = 0; high = nrow;
52192c4ed94SBarry Smith       while (high-low > 7) {
52292c4ed94SBarry Smith         t = (low+high)/2;
52392c4ed94SBarry Smith         if (rp[t] > col) high = t;
52492c4ed94SBarry Smith         else             low  = t;
52592c4ed94SBarry Smith       }
52692c4ed94SBarry Smith       for ( i=low; i<high; i++ ) {
52792c4ed94SBarry Smith         if (rp[i] > col) break;
52892c4ed94SBarry Smith         if (rp[i] == col) {
5298a84c255SSatish Balay           bap  = ap +  bs2*i;
5300e324ae4SSatish Balay           if (roworiented) {
5318a84c255SSatish Balay             if (is == ADD_VALUES) {
532dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
533dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5348a84c255SSatish Balay                   bap[jj] += *value++;
535dd9472c6SBarry Smith                 }
536dd9472c6SBarry Smith               }
5370e324ae4SSatish Balay             } else {
538dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
539dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs ) {
5400e324ae4SSatish Balay                   bap[jj] = *value++;
5418a84c255SSatish Balay                 }
542dd9472c6SBarry Smith               }
543dd9472c6SBarry Smith             }
5440e324ae4SSatish Balay           } else {
5450e324ae4SSatish Balay             if (is == ADD_VALUES) {
546dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
547dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5480e324ae4SSatish Balay                   *bap++ += *value++;
549dd9472c6SBarry Smith                 }
550dd9472c6SBarry Smith               }
5510e324ae4SSatish Balay             } else {
552dd9472c6SBarry Smith               for ( ii=0; ii<bs; ii++,value+=stepval ) {
553dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++ ) {
5540e324ae4SSatish Balay                   *bap++  = *value++;
5550e324ae4SSatish Balay                 }
5568a84c255SSatish Balay               }
557dd9472c6SBarry Smith             }
558dd9472c6SBarry Smith           }
559f1241b54SBarry Smith           goto noinsert2;
56092c4ed94SBarry Smith         }
56192c4ed94SBarry Smith       }
56289280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
56311ebbc71SLois Curfman McInnes       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
56492c4ed94SBarry Smith       if (nrow >= rmax) {
56592c4ed94SBarry Smith         /* there is no extra room in row, therefore enlarge */
56692c4ed94SBarry Smith         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
56792c4ed94SBarry Smith         Scalar *new_a;
56892c4ed94SBarry Smith 
56911ebbc71SLois Curfman McInnes         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix");
57089280ab3SLois Curfman McInnes 
57192c4ed94SBarry Smith         /* malloc new storage space */
57292c4ed94SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
57392c4ed94SBarry Smith         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
57492c4ed94SBarry Smith         new_j   = (int *) (new_a + bs2*new_nz);
57592c4ed94SBarry Smith         new_i   = new_j + new_nz;
57692c4ed94SBarry Smith 
57792c4ed94SBarry Smith         /* copy over old data into new slots */
57892c4ed94SBarry Smith         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
57992c4ed94SBarry Smith         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
58092c4ed94SBarry Smith         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
58192c4ed94SBarry Smith         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
582dd9472c6SBarry Smith         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
58392c4ed94SBarry Smith         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar));
58492c4ed94SBarry Smith         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
585dd9472c6SBarry Smith         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar));
58692c4ed94SBarry Smith         /* free up old matrix storage */
58792c4ed94SBarry Smith         PetscFree(a->a);
58892c4ed94SBarry Smith         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
58992c4ed94SBarry Smith         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
59092c4ed94SBarry Smith         a->singlemalloc = 1;
59192c4ed94SBarry Smith 
59292c4ed94SBarry Smith         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
59392c4ed94SBarry Smith         rmax = imax[row] = imax[row] + CHUNKSIZE;
59492c4ed94SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
59592c4ed94SBarry Smith         a->maxnz += bs2*CHUNKSIZE;
59692c4ed94SBarry Smith         a->reallocs++;
59792c4ed94SBarry Smith         a->nz++;
59892c4ed94SBarry Smith       }
59992c4ed94SBarry Smith       N = nrow++ - 1;
60092c4ed94SBarry Smith       /* shift up all the later entries in this row */
60192c4ed94SBarry Smith       for ( ii=N; ii>=i; ii-- ) {
60292c4ed94SBarry Smith         rp[ii+1] = rp[ii];
60392c4ed94SBarry Smith         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
60492c4ed94SBarry Smith       }
60592c4ed94SBarry Smith       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
60692c4ed94SBarry Smith       rp[i] = col;
6078a84c255SSatish Balay       bap   = ap +  bs2*i;
6080e324ae4SSatish Balay       if (roworiented) {
609dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval) {
610dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs ) {
6110e324ae4SSatish Balay             bap[jj] = *value++;
612dd9472c6SBarry Smith           }
613dd9472c6SBarry Smith         }
6140e324ae4SSatish Balay       } else {
615dd9472c6SBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval ) {
616dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++ ) {
6170e324ae4SSatish Balay             *bap++  = *value++;
6180e324ae4SSatish Balay           }
619dd9472c6SBarry Smith         }
620dd9472c6SBarry Smith       }
621f1241b54SBarry Smith       noinsert2:;
62292c4ed94SBarry Smith       low = i;
62392c4ed94SBarry Smith     }
62492c4ed94SBarry Smith     ailen[row] = nrow;
62592c4ed94SBarry Smith   }
62692c4ed94SBarry Smith   return 0;
62792c4ed94SBarry Smith }
62892c4ed94SBarry Smith 
62992c4ed94SBarry Smith #undef __FUNC__
630d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ"
6318f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
6320b824a48SSatish Balay {
6330b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
634*0752156aSBarry Smith   if (m) *m = a->m;
635*0752156aSBarry Smith   if (n) *n = a->n;
6360b824a48SSatish Balay   return 0;
6370b824a48SSatish Balay }
6380b824a48SSatish Balay 
6395615d1e5SSatish Balay #undef __FUNC__
640d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
6418f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
6420b824a48SSatish Balay {
6430b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6440b824a48SSatish Balay   *m = 0; *n = a->m;
6450b824a48SSatish Balay   return 0;
6460b824a48SSatish Balay }
6470b824a48SSatish Balay 
6485615d1e5SSatish Balay #undef __FUNC__
6495615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ"
6509867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6519867e207SSatish Balay {
6529867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6537e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
6549867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
6559867e207SSatish Balay 
6569867e207SSatish Balay   bs  = a->bs;
6579867e207SSatish Balay   ai  = a->i;
6589867e207SSatish Balay   aj  = a->j;
6599867e207SSatish Balay   aa  = a->a;
6609df24120SSatish Balay   bs2 = a->bs2;
6619867e207SSatish Balay 
662e3372554SBarry Smith   if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range");
6639867e207SSatish Balay 
6649867e207SSatish Balay   bn  = row/bs;   /* Block number */
6659867e207SSatish Balay   bp  = row % bs; /* Block Position */
6669867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
6679867e207SSatish Balay   *nz = bs*M;
6689867e207SSatish Balay 
6699867e207SSatish Balay   if (v) {
6709867e207SSatish Balay     *v = 0;
6719867e207SSatish Balay     if (*nz) {
6729867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
6739867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6749867e207SSatish Balay         v_i  = *v + i*bs;
6757e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
6767e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
6779867e207SSatish Balay       }
6789867e207SSatish Balay     }
6799867e207SSatish Balay   }
6809867e207SSatish Balay 
6819867e207SSatish Balay   if (idx) {
6829867e207SSatish Balay     *idx = 0;
6839867e207SSatish Balay     if (*nz) {
6849867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
6859867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
6869867e207SSatish Balay         idx_i = *idx + i*bs;
6879867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
6889867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
6899867e207SSatish Balay       }
6909867e207SSatish Balay     }
6919867e207SSatish Balay   }
6929867e207SSatish Balay   return 0;
6939867e207SSatish Balay }
6949867e207SSatish Balay 
6955615d1e5SSatish Balay #undef __FUNC__
696d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ"
6979867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
6989867e207SSatish Balay {
6999867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
7009867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
7019867e207SSatish Balay   return 0;
7029867e207SSatish Balay }
703b6490206SBarry Smith 
7045615d1e5SSatish Balay #undef __FUNC__
7055615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ"
7068f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B)
707f2501298SSatish Balay {
708f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
709f2501298SSatish Balay   Mat         C;
710f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
7119df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
712f2501298SSatish Balay   Scalar      *array=a->a;
713f2501298SSatish Balay 
714f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
715e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
716f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
717f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
718a7c10996SSatish Balay 
719a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
720f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
721f2501298SSatish Balay   PetscFree(col);
722f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
723f2501298SSatish Balay   cols = rows + bs;
724f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
725f2501298SSatish Balay     cols[0] = i*bs;
726f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
727f2501298SSatish Balay     len = ai[i+1] - ai[i];
728f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
729f2501298SSatish Balay       rows[0] = (*aj++)*bs;
730f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
731f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
732f2501298SSatish Balay       array += bs2;
733f2501298SSatish Balay     }
734f2501298SSatish Balay   }
7351073c447SSatish Balay   PetscFree(rows);
736f2501298SSatish Balay 
7376d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7386d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
739f2501298SSatish Balay 
740f2501298SSatish Balay   if (B != PETSC_NULL) {
741f2501298SSatish Balay     *B = C;
742f2501298SSatish Balay   } else {
743f2501298SSatish Balay     /* This isn't really an in-place transpose */
744f2501298SSatish Balay     PetscFree(a->a);
745f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
746f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
747f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
748f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
749f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
750f2501298SSatish Balay     PetscFree(a);
751f09e8eb9SSatish Balay     PetscMemcpy(A,C,sizeof(struct _p_Mat));
752f2501298SSatish Balay     PetscHeaderDestroy(C);
753f2501298SSatish Balay   }
754f2501298SSatish Balay   return 0;
755f2501298SSatish Balay }
756f2501298SSatish Balay 
757f2501298SSatish Balay 
7585615d1e5SSatish Balay #undef __FUNC__
7595615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
7608f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
761584200bdSSatish Balay {
762584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
763584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
764a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
76543ee02c3SBarry Smith   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
766584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
767584200bdSSatish Balay 
7686d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
769584200bdSSatish Balay 
77043ee02c3SBarry Smith   if (m) rmax = ailen[0];
771584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
772584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
773584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
774d402145bSBarry Smith     rmax   = PetscMax(rmax,ailen[i]);
775584200bdSSatish Balay     if (fshift) {
776a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
777584200bdSSatish Balay       N = ailen[i];
778584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
779584200bdSSatish Balay         ip[j-fshift] = ip[j];
7807e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
781584200bdSSatish Balay       }
782584200bdSSatish Balay     }
783584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
784584200bdSSatish Balay   }
785584200bdSSatish Balay   if (mbs) {
786584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
787584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
788584200bdSSatish Balay   }
789584200bdSSatish Balay   /* reset ilen and imax for each row */
790584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
791584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
792584200bdSSatish Balay   }
793a7c10996SSatish Balay   a->nz = ai[mbs];
794584200bdSSatish Balay 
795584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
796584200bdSSatish Balay   if (fshift && a->diag) {
797584200bdSSatish Balay     PetscFree(a->diag);
798584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
799584200bdSSatish Balay     a->diag = 0;
800584200bdSSatish Balay   }
8013d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
802219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
8033d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
804584200bdSSatish Balay            a->reallocs);
805d402145bSBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
806e2f3b5e9SSatish Balay   a->reallocs          = 0;
8074e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
8084e220ebcSLois Curfman McInnes 
809584200bdSSatish Balay   return 0;
810584200bdSSatish Balay }
811584200bdSSatish Balay 
8125615d1e5SSatish Balay #undef __FUNC__
8135615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ"
8148f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A)
8152593348eSBarry Smith {
816b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8179df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
8182593348eSBarry Smith   return 0;
8192593348eSBarry Smith }
8202593348eSBarry Smith 
8215615d1e5SSatish Balay #undef __FUNC__
822d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ"
823b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
8242593348eSBarry Smith {
8252593348eSBarry Smith   Mat         A  = (Mat) obj;
826b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8272593348eSBarry Smith 
8282593348eSBarry Smith #if defined(PETSC_LOG)
8292593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
8302593348eSBarry Smith #endif
8312593348eSBarry Smith   PetscFree(a->a);
8322593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
8332593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
8342593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
8352593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
8362593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
837de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
8382593348eSBarry Smith   PetscFree(a);
839f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
840f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
8412593348eSBarry Smith   return 0;
8422593348eSBarry Smith }
8432593348eSBarry Smith 
8445615d1e5SSatish Balay #undef __FUNC__
845d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ"
8468f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op)
8472593348eSBarry Smith {
848b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8496d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
8506d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
8516d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
852219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
8536d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
854c2653b3dSLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_LOCATION_ERROR)   a->nonew       = -1;
85596854ed6SLois Curfman McInnes   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew       = -2;
8566d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
8576d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
858219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
8596d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
8606d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
86190f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
8622b362799SSatish Balay            op == MAT_IGNORE_OFF_PROC_ENTRIES)
86394a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
8646d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
865e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
8662593348eSBarry Smith   else
867e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
8682593348eSBarry Smith   return 0;
8692593348eSBarry Smith }
8702593348eSBarry Smith 
8712593348eSBarry Smith 
8722593348eSBarry Smith /* -------------------------------------------------------*/
8732593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
8742593348eSBarry Smith /* -------------------------------------------------------*/
875b6490206SBarry Smith #include "pinclude/plapack.h"
876b6490206SBarry Smith 
8775615d1e5SSatish Balay #undef __FUNC__
8785615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1"
87939b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
8802593348eSBarry Smith {
881b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
88239b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
883c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
8842593348eSBarry Smith 
885c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
886c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
887b6490206SBarry Smith 
888119a934aSSatish Balay   idx   = a->j;
889119a934aSSatish Balay   v     = a->a;
890119a934aSSatish Balay   ii    = a->i;
891119a934aSSatish Balay 
892119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
893119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
894119a934aSSatish Balay     sum  = 0.0;
895119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
8961a6a6d98SBarry Smith     z[i] = sum;
897119a934aSSatish Balay   }
898c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
899c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
90039b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
90139b95ed1SBarry Smith   return 0;
90239b95ed1SBarry Smith }
90339b95ed1SBarry Smith 
9045615d1e5SSatish Balay #undef __FUNC__
9055615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2"
90639b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
90739b95ed1SBarry Smith {
90839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
90939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
91039b95ed1SBarry Smith   register Scalar x1,x2;
91139b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
912c16cb8f2SBarry Smith   int             j,n;
91339b95ed1SBarry Smith 
914c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
915c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
91639b95ed1SBarry Smith 
91739b95ed1SBarry Smith   idx   = a->j;
91839b95ed1SBarry Smith   v     = a->a;
91939b95ed1SBarry Smith   ii    = a->i;
92039b95ed1SBarry Smith 
921119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
922119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
923119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
924119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
925119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
926119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
927119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
928119a934aSSatish Balay       v += 4;
929119a934aSSatish Balay     }
9301a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
931119a934aSSatish Balay     z += 2;
932119a934aSSatish Balay   }
933c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
934c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
93539b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
93639b95ed1SBarry Smith   return 0;
93739b95ed1SBarry Smith }
93839b95ed1SBarry Smith 
9395615d1e5SSatish Balay #undef __FUNC__
9405615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3"
94139b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
94239b95ed1SBarry Smith {
94339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
94439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
945c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
94639b95ed1SBarry Smith 
947c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
948c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
94939b95ed1SBarry Smith 
95039b95ed1SBarry Smith   idx   = a->j;
95139b95ed1SBarry Smith   v     = a->a;
95239b95ed1SBarry Smith   ii    = a->i;
95339b95ed1SBarry Smith 
954119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
955119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
956119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
957119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
958119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
959119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
960119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
961119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
962119a934aSSatish Balay       v += 9;
963119a934aSSatish Balay     }
9641a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
965119a934aSSatish Balay     z += 3;
966119a934aSSatish Balay   }
967c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
968c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
96939b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
97039b95ed1SBarry Smith   return 0;
97139b95ed1SBarry Smith }
97239b95ed1SBarry Smith 
9735615d1e5SSatish Balay #undef __FUNC__
9745615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4"
97539b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
97639b95ed1SBarry Smith {
97739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
97839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
97939b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
98039b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
981c16cb8f2SBarry Smith   int             j,n;
98239b95ed1SBarry Smith 
983c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
984c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
98539b95ed1SBarry Smith 
98639b95ed1SBarry Smith   idx   = a->j;
98739b95ed1SBarry Smith   v     = a->a;
98839b95ed1SBarry Smith   ii    = a->i;
98939b95ed1SBarry Smith 
990119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
991119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
992119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
993119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
994119a934aSSatish Balay       xb = x + 4*(*idx++);
995119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
996119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
997119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
998119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
999119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1000119a934aSSatish Balay       v += 16;
1001119a934aSSatish Balay     }
10021a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1003119a934aSSatish Balay     z += 4;
1004119a934aSSatish Balay   }
1005c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1006c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
100739b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
100839b95ed1SBarry Smith   return 0;
100939b95ed1SBarry Smith }
101039b95ed1SBarry Smith 
10115615d1e5SSatish Balay #undef __FUNC__
10125615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5"
101339b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
101439b95ed1SBarry Smith {
101539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
101639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
101739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
1018c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
101939b95ed1SBarry Smith 
1020c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1021c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
102239b95ed1SBarry Smith 
102339b95ed1SBarry Smith   idx   = a->j;
102439b95ed1SBarry Smith   v     = a->a;
102539b95ed1SBarry Smith   ii    = a->i;
102639b95ed1SBarry Smith 
1027119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1028119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
1029119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
1030119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1031119a934aSSatish Balay       xb = x + 5*(*idx++);
1032119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1033119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1034119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1035119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1036119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1037119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1038119a934aSSatish Balay       v += 25;
1039119a934aSSatish Balay     }
10401a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1041119a934aSSatish Balay     z += 5;
1042119a934aSSatish Balay   }
1043c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1044c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
104539b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
104639b95ed1SBarry Smith   return 0;
104739b95ed1SBarry Smith }
104839b95ed1SBarry Smith 
10495615d1e5SSatish Balay #undef __FUNC__
105048e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7"
105148e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
105248e9ddb2SSatish Balay {
105348e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
105448e9ddb2SSatish Balay   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
105548e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
105648e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
105748e9ddb2SSatish Balay 
105848e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
105948e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
106048e9ddb2SSatish Balay 
106148e9ddb2SSatish Balay   idx   = a->j;
106248e9ddb2SSatish Balay   v     = a->a;
106348e9ddb2SSatish Balay   ii    = a->i;
106448e9ddb2SSatish Balay 
106548e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
106648e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
106748e9ddb2SSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
106848e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
106948e9ddb2SSatish Balay       xb = x + 7*(*idx++);
107048e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
107148e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
107248e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
107348e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
107448e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
107548e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
107648e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
107748e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
107848e9ddb2SSatish Balay       v += 49;
107948e9ddb2SSatish Balay     }
108048e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
108148e9ddb2SSatish Balay     z += 7;
108248e9ddb2SSatish Balay   }
108348e9ddb2SSatish Balay 
108448e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
108548e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
108648e9ddb2SSatish Balay   PLogFlops(98*a->nz - a->m);
108748e9ddb2SSatish Balay   return 0;
108848e9ddb2SSatish Balay }
108948e9ddb2SSatish Balay 
109048e9ddb2SSatish Balay #undef __FUNC__
10915615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N"
109239b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
109339b95ed1SBarry Smith {
109439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
109539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
1096c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
1097c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
1098c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
109939b95ed1SBarry Smith 
1100c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1101c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
110239b95ed1SBarry Smith 
110339b95ed1SBarry Smith   idx   = a->j;
110439b95ed1SBarry Smith   v     = a->a;
110539b95ed1SBarry Smith   ii    = a->i;
110639b95ed1SBarry Smith 
110739b95ed1SBarry Smith 
1108119a934aSSatish Balay   if (!a->mult_work) {
11093b547af2SSatish Balay     k = PetscMax(a->m,a->n);
11102b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
1111119a934aSSatish Balay   }
1112119a934aSSatish Balay   work = a->mult_work;
1113119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
1114119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
1115119a934aSSatish Balay     ncols = n*bs;
1116119a934aSSatish Balay     workt = work;
1117119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
1118119a934aSSatish Balay       xb = x + bs*(*idx++);
1119119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1120119a934aSSatish Balay       workt += bs;
1121119a934aSSatish Balay     }
11221a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
1123119a934aSSatish Balay     v += n*bs2;
1124119a934aSSatish Balay     z += bs;
1125119a934aSSatish Balay   }
1126c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1127c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
11281a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
1129bb42667fSSatish Balay   return 0;
1130bb42667fSSatish Balay }
1131bb42667fSSatish Balay 
11325615d1e5SSatish Balay #undef __FUNC__
11335615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1"
1134f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1135f44d3a6dSSatish Balay {
1136f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1137f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
1138c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
1139f44d3a6dSSatish Balay 
1140c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1141c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1142c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1143f44d3a6dSSatish Balay 
1144f44d3a6dSSatish Balay   idx   = a->j;
1145f44d3a6dSSatish Balay   v     = a->a;
1146f44d3a6dSSatish Balay   ii    = a->i;
1147f44d3a6dSSatish Balay 
1148f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1149f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
1150f44d3a6dSSatish Balay     sum  = y[i];
1151f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
1152f44d3a6dSSatish Balay     z[i] = sum;
1153f44d3a6dSSatish Balay   }
1154c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1155c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1156c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1157f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
1158f44d3a6dSSatish Balay   return 0;
1159f44d3a6dSSatish Balay }
1160f44d3a6dSSatish Balay 
11615615d1e5SSatish Balay #undef __FUNC__
11625615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2"
1163f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1164f44d3a6dSSatish Balay {
1165f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1166f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
1167f44d3a6dSSatish Balay   register Scalar x1,x2;
1168f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1169c16cb8f2SBarry Smith   int             j,n;
1170f44d3a6dSSatish Balay 
1171c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1172c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1173c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1174f44d3a6dSSatish Balay 
1175f44d3a6dSSatish Balay   idx   = a->j;
1176f44d3a6dSSatish Balay   v     = a->a;
1177f44d3a6dSSatish Balay   ii    = a->i;
1178f44d3a6dSSatish Balay 
1179f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1180f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1181f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
1182f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1183f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1184f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
1185f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
1186f44d3a6dSSatish Balay       v += 4;
1187f44d3a6dSSatish Balay     }
1188f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
1189f44d3a6dSSatish Balay     z += 2; y += 2;
1190f44d3a6dSSatish Balay   }
1191c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1192c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1193c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1194f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
1195f44d3a6dSSatish Balay   return 0;
1196f44d3a6dSSatish Balay }
1197f44d3a6dSSatish Balay 
11985615d1e5SSatish Balay #undef __FUNC__
11995615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3"
1200f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1201f44d3a6dSSatish Balay {
1202f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1203f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
1204c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1205f44d3a6dSSatish Balay 
1206c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1207c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1208c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1209f44d3a6dSSatish Balay 
1210f44d3a6dSSatish Balay   idx   = a->j;
1211f44d3a6dSSatish Balay   v     = a->a;
1212f44d3a6dSSatish Balay   ii    = a->i;
1213f44d3a6dSSatish Balay 
1214f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1215f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1216f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1217f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1218f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1219f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1220f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1221f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1222f44d3a6dSSatish Balay       v += 9;
1223f44d3a6dSSatish Balay     }
1224f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1225f44d3a6dSSatish Balay     z += 3; y += 3;
1226f44d3a6dSSatish Balay   }
1227c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1228c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1229c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1230f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
1231f44d3a6dSSatish Balay   return 0;
1232f44d3a6dSSatish Balay }
1233f44d3a6dSSatish Balay 
12345615d1e5SSatish Balay #undef __FUNC__
12355615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4"
1236f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1237f44d3a6dSSatish Balay {
1238f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1239f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
1240f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
1241f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
1242c16cb8f2SBarry Smith   int             j,n;
1243f44d3a6dSSatish Balay 
1244c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1245c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1246c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1247f44d3a6dSSatish Balay 
1248f44d3a6dSSatish Balay   idx   = a->j;
1249f44d3a6dSSatish Balay   v     = a->a;
1250f44d3a6dSSatish Balay   ii    = a->i;
1251f44d3a6dSSatish Balay 
1252f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1253f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1254f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1255f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1256f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1257f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1258f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1259f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1260f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1261f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1262f44d3a6dSSatish Balay       v += 16;
1263f44d3a6dSSatish Balay     }
1264f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1265f44d3a6dSSatish Balay     z += 4; y += 4;
1266f44d3a6dSSatish Balay   }
1267c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1268c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1269c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1270f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1271f44d3a6dSSatish Balay   return 0;
1272f44d3a6dSSatish Balay }
1273f44d3a6dSSatish Balay 
12745615d1e5SSatish Balay #undef __FUNC__
12755615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5"
1276f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1277f44d3a6dSSatish Balay {
1278f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1279f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1280f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1281c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1282f44d3a6dSSatish Balay 
1283c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1284c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1285c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1286f44d3a6dSSatish Balay 
1287f44d3a6dSSatish Balay   idx   = a->j;
1288f44d3a6dSSatish Balay   v     = a->a;
1289f44d3a6dSSatish Balay   ii    = a->i;
1290f44d3a6dSSatish Balay 
1291f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1292f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1293f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1294f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1295f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1296f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1297f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1298f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1299f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1300f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1301f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1302f44d3a6dSSatish Balay       v += 25;
1303f44d3a6dSSatish Balay     }
1304f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1305f44d3a6dSSatish Balay     z += 5; y += 5;
1306f44d3a6dSSatish Balay   }
1307c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1308c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1309c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1310f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1311f44d3a6dSSatish Balay   return 0;
1312f44d3a6dSSatish Balay }
1313f44d3a6dSSatish Balay 
13145615d1e5SSatish Balay #undef __FUNC__
131548e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7"
131648e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
131748e9ddb2SSatish Balay {
131848e9ddb2SSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
131948e9ddb2SSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
132048e9ddb2SSatish Balay   register Scalar x1,x2,x3,x4,x5,x6,x7;
132148e9ddb2SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n;
132248e9ddb2SSatish Balay 
132348e9ddb2SSatish Balay   VecGetArray_Fast(xx,x);
132448e9ddb2SSatish Balay   VecGetArray_Fast(yy,y);
132548e9ddb2SSatish Balay   VecGetArray_Fast(zz,z);
132648e9ddb2SSatish Balay 
132748e9ddb2SSatish Balay   idx   = a->j;
132848e9ddb2SSatish Balay   v     = a->a;
132948e9ddb2SSatish Balay   ii    = a->i;
133048e9ddb2SSatish Balay 
133148e9ddb2SSatish Balay   for ( i=0; i<mbs; i++ ) {
133248e9ddb2SSatish Balay     n  = ii[1] - ii[0]; ii++;
133348e9ddb2SSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
133448e9ddb2SSatish Balay     for ( j=0; j<n; j++ ) {
133548e9ddb2SSatish Balay       xb = x + 7*(*idx++);
133648e9ddb2SSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
133748e9ddb2SSatish Balay       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
133848e9ddb2SSatish Balay       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
133948e9ddb2SSatish Balay       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
134048e9ddb2SSatish Balay       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
134148e9ddb2SSatish Balay       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
134248e9ddb2SSatish Balay       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
134348e9ddb2SSatish Balay       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
134448e9ddb2SSatish Balay       v += 49;
134548e9ddb2SSatish Balay     }
134648e9ddb2SSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
134748e9ddb2SSatish Balay     z += 7; y += 7;
134848e9ddb2SSatish Balay   }
134948e9ddb2SSatish Balay   VecRestoreArray_Fast(xx,x);
135048e9ddb2SSatish Balay   VecRestoreArray_Fast(yy,y);
135148e9ddb2SSatish Balay   VecRestoreArray_Fast(zz,z);
135248e9ddb2SSatish Balay   PLogFlops(98*a->nz);
135348e9ddb2SSatish Balay   return 0;
135448e9ddb2SSatish Balay }
135548e9ddb2SSatish Balay 
135648e9ddb2SSatish Balay 
135748e9ddb2SSatish Balay #undef __FUNC__
13585615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N"
1359f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1360f44d3a6dSSatish Balay {
1361f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1362f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1363f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1364f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1365f44d3a6dSSatish Balay 
1366f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1367f44d3a6dSSatish Balay 
1368c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1369c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1370f44d3a6dSSatish Balay 
1371f44d3a6dSSatish Balay   idx   = a->j;
1372f44d3a6dSSatish Balay   v     = a->a;
1373f44d3a6dSSatish Balay   ii    = a->i;
1374f44d3a6dSSatish Balay 
1375f44d3a6dSSatish Balay 
1376f44d3a6dSSatish Balay   if (!a->mult_work) {
1377f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1378f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1379f44d3a6dSSatish Balay   }
1380f44d3a6dSSatish Balay   work = a->mult_work;
1381f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1382f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1383f44d3a6dSSatish Balay     ncols = n*bs;
1384f44d3a6dSSatish Balay     workt = work;
1385f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1386f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1387f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1388f44d3a6dSSatish Balay       workt += bs;
1389f44d3a6dSSatish Balay     }
1390f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1391f44d3a6dSSatish Balay     v += n*bs2;
1392f44d3a6dSSatish Balay     z += bs;
1393f44d3a6dSSatish Balay   }
1394c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1395c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1396f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1397f44d3a6dSSatish Balay   return 0;
1398f44d3a6dSSatish Balay }
1399f44d3a6dSSatish Balay 
14005615d1e5SSatish Balay #undef __FUNC__
14015615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ"
14028f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1403bb42667fSSatish Balay {
1404bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
14051a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1406bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1407bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
14089df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1409119a934aSSatish Balay 
1410119a934aSSatish Balay 
141190f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
141290f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1413bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1414bb42667fSSatish Balay 
1415119a934aSSatish Balay   idx   = a->j;
1416119a934aSSatish Balay   v     = a->a;
1417119a934aSSatish Balay   ii    = a->i;
1418119a934aSSatish Balay 
1419119a934aSSatish Balay   switch (bs) {
1420119a934aSSatish Balay   case 1:
1421119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1422119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1423119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1424119a934aSSatish Balay       ib = idx + ai[i];
1425119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1426bb1453f0SSatish Balay         rval    = ib[j];
1427bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1428119a934aSSatish Balay       }
1429119a934aSSatish Balay     }
1430119a934aSSatish Balay     break;
1431119a934aSSatish Balay   case 2:
1432119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1433119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1434119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1435119a934aSSatish Balay       ib = idx + ai[i];
1436119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1437119a934aSSatish Balay         rval      = ib[j]*2;
1438bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1439bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1440119a934aSSatish Balay         v += 4;
1441119a934aSSatish Balay       }
1442119a934aSSatish Balay     }
1443119a934aSSatish Balay     break;
1444119a934aSSatish Balay   case 3:
1445119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1446119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1447119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1448119a934aSSatish Balay       ib = idx + ai[i];
1449119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1450119a934aSSatish Balay         rval      = ib[j]*3;
1451bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1452bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1453bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1454119a934aSSatish Balay         v += 9;
1455119a934aSSatish Balay       }
1456119a934aSSatish Balay     }
1457119a934aSSatish Balay     break;
1458119a934aSSatish Balay   case 4:
1459119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1460119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1461119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1462119a934aSSatish Balay       ib = idx + ai[i];
1463119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1464119a934aSSatish Balay         rval      = ib[j]*4;
1465bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1466bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1467bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1468bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1469119a934aSSatish Balay         v += 16;
1470119a934aSSatish Balay       }
1471119a934aSSatish Balay     }
1472119a934aSSatish Balay     break;
1473119a934aSSatish Balay   case 5:
1474119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1475119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1476119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1477119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1478119a934aSSatish Balay       ib = idx + ai[i];
1479119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1480119a934aSSatish Balay         rval      = ib[j]*5;
1481bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1482bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1483bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1484bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1485bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1486119a934aSSatish Balay         v += 25;
1487119a934aSSatish Balay       }
1488119a934aSSatish Balay     }
1489119a934aSSatish Balay     break;
1490119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1491119a934aSSatish Balay     default: {
1492119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1493119a934aSSatish Balay       if (!a->mult_work) {
14943b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1495bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1496119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1497119a934aSSatish Balay       }
1498119a934aSSatish Balay       work = a->mult_work;
1499119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1500119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1501119a934aSSatish Balay         ncols = n*bs;
1502119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1503119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1504119a934aSSatish Balay         v += n*bs2;
1505119a934aSSatish Balay         x += bs;
1506119a934aSSatish Balay         workt = work;
1507119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1508119a934aSSatish Balay           zb = z + bs*(*idx++);
1509bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1510119a934aSSatish Balay           workt += bs;
1511119a934aSSatish Balay         }
1512119a934aSSatish Balay       }
1513119a934aSSatish Balay     }
1514119a934aSSatish Balay   }
15150419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
15160419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1517faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1518faf6580fSSatish Balay   return 0;
1519faf6580fSSatish Balay }
15201c351548SSatish Balay 
15215615d1e5SSatish Balay #undef __FUNC__
15225615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ"
15238f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1524faf6580fSSatish Balay {
1525faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1526faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1527faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1528faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1529faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1530faf6580fSSatish Balay 
1531faf6580fSSatish Balay 
1532faf6580fSSatish Balay 
153390f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
153490f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1535faf6580fSSatish Balay 
1536648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1537648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1538648c1d95SSatish Balay 
1539faf6580fSSatish Balay 
1540faf6580fSSatish Balay   idx   = a->j;
1541faf6580fSSatish Balay   v     = a->a;
1542faf6580fSSatish Balay   ii    = a->i;
1543faf6580fSSatish Balay 
1544faf6580fSSatish Balay   switch (bs) {
1545faf6580fSSatish Balay   case 1:
1546faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1547faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1548faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1549faf6580fSSatish Balay       ib = idx + ai[i];
1550faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1551faf6580fSSatish Balay         rval    = ib[j];
1552faf6580fSSatish Balay         z[rval] += *v++ * x1;
1553faf6580fSSatish Balay       }
1554faf6580fSSatish Balay     }
1555faf6580fSSatish Balay     break;
1556faf6580fSSatish Balay   case 2:
1557faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1558faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1559faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1560faf6580fSSatish Balay       ib = idx + ai[i];
1561faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1562faf6580fSSatish Balay         rval      = ib[j]*2;
1563faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1564faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1565faf6580fSSatish Balay         v += 4;
1566faf6580fSSatish Balay       }
1567faf6580fSSatish Balay     }
1568faf6580fSSatish Balay     break;
1569faf6580fSSatish Balay   case 3:
1570faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1571faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1572faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1573faf6580fSSatish Balay       ib = idx + ai[i];
1574faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1575faf6580fSSatish Balay         rval      = ib[j]*3;
1576faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1577faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1578faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1579faf6580fSSatish Balay         v += 9;
1580faf6580fSSatish Balay       }
1581faf6580fSSatish Balay     }
1582faf6580fSSatish Balay     break;
1583faf6580fSSatish Balay   case 4:
1584faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1585faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1586faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1587faf6580fSSatish Balay       ib = idx + ai[i];
1588faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1589faf6580fSSatish Balay         rval      = ib[j]*4;
1590faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1591faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1592faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1593faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1594faf6580fSSatish Balay         v += 16;
1595faf6580fSSatish Balay       }
1596faf6580fSSatish Balay     }
1597faf6580fSSatish Balay     break;
1598faf6580fSSatish Balay   case 5:
1599faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1600faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1601faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1602faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1603faf6580fSSatish Balay       ib = idx + ai[i];
1604faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1605faf6580fSSatish Balay         rval      = ib[j]*5;
1606faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1607faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1608faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1609faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1610faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1611faf6580fSSatish Balay         v += 25;
1612faf6580fSSatish Balay       }
1613faf6580fSSatish Balay     }
1614faf6580fSSatish Balay     break;
1615faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1616faf6580fSSatish Balay     default: {
1617faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1618faf6580fSSatish Balay       if (!a->mult_work) {
1619faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1620faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1621faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1622faf6580fSSatish Balay       }
1623faf6580fSSatish Balay       work = a->mult_work;
1624faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1625faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1626faf6580fSSatish Balay         ncols = n*bs;
1627faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1628faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1629faf6580fSSatish Balay         v += n*bs2;
1630faf6580fSSatish Balay         x += bs;
1631faf6580fSSatish Balay         workt = work;
1632faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1633faf6580fSSatish Balay           zb = z + bs*(*idx++);
1634faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1635faf6580fSSatish Balay           workt += bs;
1636faf6580fSSatish Balay         }
1637faf6580fSSatish Balay       }
1638faf6580fSSatish Balay     }
1639faf6580fSSatish Balay   }
1640faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1641faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1642faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
16432593348eSBarry Smith   return 0;
16442593348eSBarry Smith }
16452593348eSBarry Smith 
16465615d1e5SSatish Balay #undef __FUNC__
1647d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ"
16488f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
16492593348eSBarry Smith {
1650b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
16514e220ebcSLois Curfman McInnes 
16524e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
16534e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
16544e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
16554e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
16564e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
16574e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
16584e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
16594e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
16604e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
16614e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
16624e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
16634e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
16644e220ebcSLois Curfman McInnes   info->memory       = A->mem;
16654e220ebcSLois Curfman McInnes   if (A->factor) {
16664e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
16674e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
16684e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
16694e220ebcSLois Curfman McInnes   } else {
16704e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
16714e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
16724e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
16734e220ebcSLois Curfman McInnes   }
16742593348eSBarry Smith   return 0;
16752593348eSBarry Smith }
16762593348eSBarry Smith 
16775615d1e5SSatish Balay #undef __FUNC__
16785615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ"
16798f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
168091d316f6SSatish Balay {
168191d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
168291d316f6SSatish Balay 
1683e3372554SBarry Smith   if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type");
168491d316f6SSatish Balay 
168591d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
168691d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1687a7c10996SSatish Balay       (a->nz != b->nz)) {
168891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
168991d316f6SSatish Balay   }
169091d316f6SSatish Balay 
169191d316f6SSatish Balay   /* if the a->i are the same */
169291d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
169391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169491d316f6SSatish Balay   }
169591d316f6SSatish Balay 
169691d316f6SSatish Balay   /* if a->j are the same */
169791d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
169891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
169991d316f6SSatish Balay   }
170091d316f6SSatish Balay 
170191d316f6SSatish Balay   /* if a->a are the same */
170291d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
170391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
170491d316f6SSatish Balay   }
170591d316f6SSatish Balay   *flg = PETSC_TRUE;
170691d316f6SSatish Balay   return 0;
170791d316f6SSatish Balay 
170891d316f6SSatish Balay }
170991d316f6SSatish Balay 
17105615d1e5SSatish Balay #undef __FUNC__
17115615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ"
17128f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
171391d316f6SSatish Balay {
171491d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17157e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
171617e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
171717e48fc4SSatish Balay 
17180513a670SBarry Smith   if (A->factor) SETERRQ(1,0,"Not for factored matrix");
171917e48fc4SSatish Balay   bs   = a->bs;
172017e48fc4SSatish Balay   aa   = a->a;
172117e48fc4SSatish Balay   ai   = a->i;
172217e48fc4SSatish Balay   aj   = a->j;
172317e48fc4SSatish Balay   ambs = a->mbs;
17249df24120SSatish Balay   bs2  = a->bs2;
172591d316f6SSatish Balay 
172691d316f6SSatish Balay   VecSet(&zero,v);
172790f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1728e3372554SBarry Smith   if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector");
172917e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
173017e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
173117e48fc4SSatish Balay       if (aj[j] == i) {
173217e48fc4SSatish Balay         row  = i*bs;
17337e67e3f9SSatish Balay         aa_j = aa+j*bs2;
17347e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
173591d316f6SSatish Balay         break;
173691d316f6SSatish Balay       }
173791d316f6SSatish Balay     }
173891d316f6SSatish Balay   }
173991d316f6SSatish Balay   return 0;
174091d316f6SSatish Balay }
174191d316f6SSatish Balay 
17425615d1e5SSatish Balay #undef __FUNC__
17435615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ"
17448f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
17459867e207SSatish Balay {
17469867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
17479867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
17487e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
17499867e207SSatish Balay 
17509867e207SSatish Balay   ai  = a->i;
17519867e207SSatish Balay   aj  = a->j;
17529867e207SSatish Balay   aa  = a->a;
17539867e207SSatish Balay   m   = a->m;
17549867e207SSatish Balay   n   = a->n;
17559867e207SSatish Balay   bs  = a->bs;
17569867e207SSatish Balay   mbs = a->mbs;
17579df24120SSatish Balay   bs2 = a->bs2;
17589867e207SSatish Balay   if (ll) {
175990f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1760e3372554SBarry Smith     if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length");
17619867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17629867e207SSatish Balay       M  = ai[i+1] - ai[i];
17639867e207SSatish Balay       li = l + i*bs;
17647e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17659867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17667e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
17679867e207SSatish Balay           (*v++) *= li[k%bs];
17689867e207SSatish Balay         }
17699867e207SSatish Balay       }
17709867e207SSatish Balay     }
17719867e207SSatish Balay   }
17729867e207SSatish Balay 
17739867e207SSatish Balay   if (rr) {
177490f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1775e3372554SBarry Smith     if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length");
17769867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
17779867e207SSatish Balay       M  = ai[i+1] - ai[i];
17787e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
17799867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
17809867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
17819867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
17829867e207SSatish Balay           x = ri[k];
17839867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
17849867e207SSatish Balay         }
17859867e207SSatish Balay       }
17869867e207SSatish Balay     }
17879867e207SSatish Balay   }
17889867e207SSatish Balay   return 0;
17899867e207SSatish Balay }
17909867e207SSatish Balay 
17919867e207SSatish Balay 
1792b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1793b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
17942a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1795736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1796736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
17971a6a6d98SBarry Smith 
17981a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
17991a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
18001a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
18011a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
18021a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
18031a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
180448e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
18051a6a6d98SBarry Smith 
18067fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
18077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
18087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
18097fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
18107fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
18117fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
18122593348eSBarry Smith 
18135615d1e5SSatish Balay #undef __FUNC__
18145615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ"
18158f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
18162593348eSBarry Smith {
1817b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18182593348eSBarry Smith   Scalar      *v = a->a;
18192593348eSBarry Smith   double      sum = 0.0;
18209df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
18212593348eSBarry Smith 
18222593348eSBarry Smith   if (type == NORM_FROBENIUS) {
18230419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
18242593348eSBarry Smith #if defined(PETSC_COMPLEX)
18252593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
18262593348eSBarry Smith #else
18272593348eSBarry Smith       sum += (*v)*(*v); v++;
18282593348eSBarry Smith #endif
18292593348eSBarry Smith     }
18302593348eSBarry Smith     *norm = sqrt(sum);
18312593348eSBarry Smith   }
18322593348eSBarry Smith   else {
1833e3372554SBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
18342593348eSBarry Smith   }
18352593348eSBarry Smith   return 0;
18362593348eSBarry Smith }
18372593348eSBarry Smith 
18383eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
18392593348eSBarry Smith /*
18402593348eSBarry Smith      note: This can only work for identity for row and col. It would
18412593348eSBarry Smith    be good to check this and otherwise generate an error.
18422593348eSBarry Smith */
18435615d1e5SSatish Balay #undef __FUNC__
18445615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ"
18458f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
18462593348eSBarry Smith {
1847b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18482593348eSBarry Smith   Mat         outA;
1849de6a44a3SBarry Smith   int         ierr;
18502593348eSBarry Smith 
1851e3372554SBarry Smith   if (fill != 0) SETERRQ(1,0,"Only fill=0 supported");
18522593348eSBarry Smith 
18532593348eSBarry Smith   outA          = inA;
18542593348eSBarry Smith   inA->factor   = FACTOR_LU;
18552593348eSBarry Smith   a->row        = row;
18562593348eSBarry Smith   a->col        = col;
18572593348eSBarry Smith 
1858eed86810SBarry Smith   if (!a->solve_work) {
1859de6a44a3SBarry Smith     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1860eed86810SBarry Smith     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1861eed86810SBarry Smith   }
18622593348eSBarry Smith 
18632593348eSBarry Smith   if (!a->diag) {
1864b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
18652593348eSBarry Smith   }
18667fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
18673eee16b0SBarry Smith 
18683eee16b0SBarry Smith   /*
18693eee16b0SBarry Smith       Blocksize 4 has a special faster solver for ILU(0) factorization
18703eee16b0SBarry Smith     with natural ordering
18713eee16b0SBarry Smith   */
18723eee16b0SBarry Smith   if (a->bs == 4) {
18733eee16b0SBarry Smith     inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
18743eee16b0SBarry Smith   }
18753eee16b0SBarry Smith 
18762593348eSBarry Smith   return 0;
18772593348eSBarry Smith }
18782593348eSBarry Smith 
18795615d1e5SSatish Balay #undef __FUNC__
18805615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ"
18818f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
18822593348eSBarry Smith {
1883b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
18849df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1885b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1886b6490206SBarry Smith   PLogFlops(totalnz);
18872593348eSBarry Smith   return 0;
18882593348eSBarry Smith }
18892593348eSBarry Smith 
18905615d1e5SSatish Balay #undef __FUNC__
18915615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ"
18928f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
18937e67e3f9SSatish Balay {
18947e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
18957e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1896a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
18979df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
18987e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
18997e67e3f9SSatish Balay 
19007e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
19017e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
1902e3372554SBarry Smith     if (row < 0) SETERRQ(1,0,"Negative row");
1903e3372554SBarry Smith     if (row >= a->m) SETERRQ(1,0,"Row too large");
1904a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
19057e67e3f9SSatish Balay     nrow = ailen[brow];
19067e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
1907e3372554SBarry Smith       if (in[l] < 0) SETERRQ(1,0,"Negative column");
1908e3372554SBarry Smith       if (in[l] >= a->n) SETERRQ(1,0,"Column too large");
1909a7c10996SSatish Balay       col  = in[l] ;
19107e67e3f9SSatish Balay       bcol = col/bs;
19117e67e3f9SSatish Balay       cidx = col%bs;
19127e67e3f9SSatish Balay       ridx = row%bs;
19137e67e3f9SSatish Balay       high = nrow;
19147e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
19157e67e3f9SSatish Balay       while (high-low > 5) {
19167e67e3f9SSatish Balay         t = (low+high)/2;
19177e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
19187e67e3f9SSatish Balay         else             low  = t;
19197e67e3f9SSatish Balay       }
19207e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
19217e67e3f9SSatish Balay         if (rp[i] > bcol) break;
19227e67e3f9SSatish Balay         if (rp[i] == bcol) {
19237e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
19247e67e3f9SSatish Balay           goto finished;
19257e67e3f9SSatish Balay         }
19267e67e3f9SSatish Balay       }
19277e67e3f9SSatish Balay       *v++ = zero;
19287e67e3f9SSatish Balay       finished:;
19297e67e3f9SSatish Balay     }
19307e67e3f9SSatish Balay   }
19317e67e3f9SSatish Balay   return 0;
19327e67e3f9SSatish Balay }
19337e67e3f9SSatish Balay 
19345615d1e5SSatish Balay #undef __FUNC__
1935d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
19368f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
19375a838052SSatish Balay {
19385a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
19395a838052SSatish Balay   *bs = baij->bs;
19405a838052SSatish Balay   return 0;
19415a838052SSatish Balay }
19425a838052SSatish Balay 
1943d9b7c43dSSatish Balay /* idx should be of length atlease bs */
19445615d1e5SSatish Balay #undef __FUNC__
19455615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block"
1946d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1947d9b7c43dSSatish Balay {
1948d9b7c43dSSatish Balay   int i,row;
1949d9b7c43dSSatish Balay   row = idx[0];
1950d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1951d9b7c43dSSatish Balay 
1952d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1953d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1954d9b7c43dSSatish Balay   }
1955d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1956d9b7c43dSSatish Balay   return 0;
1957d9b7c43dSSatish Balay }
1958d9b7c43dSSatish Balay 
19595615d1e5SSatish Balay #undef __FUNC__
19605615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ"
19618f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1962d9b7c43dSSatish Balay {
1963d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1964d9b7c43dSSatish Balay   IS          is_local;
1965d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1966d9b7c43dSSatish Balay   PetscTruth  flg;
1967d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1968d9b7c43dSSatish Balay 
1969d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1970d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1971d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1972537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1973d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1974d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1975d9b7c43dSSatish Balay 
1976d9b7c43dSSatish Balay   i = 0;
1977d9b7c43dSSatish Balay   while (i < is_n) {
1978e3372554SBarry Smith     if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range");
1979d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1980d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1981d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1982d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1983d9b7c43dSSatish Balay       i += bs;
1984d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1985d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1986d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1987d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1988d9b7c43dSSatish Balay         aa[0] = zero;
1989d9b7c43dSSatish Balay         aa+=bs;
1990d9b7c43dSSatish Balay       }
1991d9b7c43dSSatish Balay       i++;
1992d9b7c43dSSatish Balay     }
1993d9b7c43dSSatish Balay   }
1994d9b7c43dSSatish Balay   if (diag) {
1995d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1996d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1997d9b7c43dSSatish Balay     }
1998d9b7c43dSSatish Balay   }
1999d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
2000d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
2001d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
20029a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2003d9b7c43dSSatish Balay 
2004d9b7c43dSSatish Balay   return 0;
2005d9b7c43dSSatish Balay }
20061c351548SSatish Balay 
20075615d1e5SSatish Balay #undef __FUNC__
2008d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ"
2009ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
2010ba4ca20aSSatish Balay {
2011ba4ca20aSSatish Balay   static int called = 0;
2012ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
2013ba4ca20aSSatish Balay 
2014ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
2015ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
2016ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
2017ba4ca20aSSatish Balay   return 0;
2018ba4ca20aSSatish Balay }
2019d9b7c43dSSatish Balay 
20202593348eSBarry Smith /* -------------------------------------------------------------------*/
2021cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
20229867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
2023f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
2024faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
20251a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
2026b6490206SBarry Smith        0,0,
2027de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
2028b6490206SBarry Smith        0,
2029f2501298SSatish Balay        MatTranspose_SeqBAIJ,
203017e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
20319867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
2032584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
2033b6490206SBarry Smith        0,
2034d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
20357fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
2036b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
2037de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
2038d402145bSBarry Smith        0,0,
2039b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
2040b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
2041af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
20427e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
2043ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
20443b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
20453b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
204692c4ed94SBarry Smith        MatRestoreRowIJ_SeqBAIJ,
204792c4ed94SBarry Smith        0,0,0,0,0,0,
204892c4ed94SBarry Smith        MatSetValuesBlocked_SeqBAIJ};
20492593348eSBarry Smith 
20505615d1e5SSatish Balay #undef __FUNC__
20515615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ"
20522593348eSBarry Smith /*@C
205344cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
205444cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
205544cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
20562bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
20572bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
20582593348eSBarry Smith 
20592593348eSBarry Smith    Input Parameters:
2060029af93fSBarry Smith .  comm - MPI communicator, set to PETSC_COMM_SELF
2061b6490206SBarry Smith .  bs - size of block
20622593348eSBarry Smith .  m - number of rows
20632593348eSBarry Smith .  n - number of columns
2064b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
20652bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
20662bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
20672593348eSBarry Smith 
20682593348eSBarry Smith    Output Parameter:
20692593348eSBarry Smith .  A - the matrix
20702593348eSBarry Smith 
20710513a670SBarry Smith    Options Database Keys:
20720513a670SBarry Smith $    -mat_no_unroll - uses code that does not unroll the loops in the
20730effe34fSLois Curfman McInnes $                     block calculations (much slower)
20740513a670SBarry Smith $    -mat_block_size - size of the blocks to use
20750513a670SBarry Smith 
20762593348eSBarry Smith    Notes:
207744cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
20782593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
207944cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
20802593348eSBarry Smith 
20812593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
20822593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
20832593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
20846da5968aSLois Curfman McInnes    matrices.
20852593348eSBarry Smith 
208644cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
20872593348eSBarry Smith @*/
2088b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
20892593348eSBarry Smith {
20902593348eSBarry Smith   Mat         B;
2091b6490206SBarry Smith   Mat_SeqBAIJ *b;
20923b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
20933b2fbd54SBarry Smith 
20943b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
2095e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"Comm must be of size 1");
2096b6490206SBarry Smith 
20970513a670SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
20980513a670SBarry Smith 
2099f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
2100e3372554SBarry Smith     SETERRQ(1,0,"Number rows, cols must be divisible by blocksize");
21012593348eSBarry Smith 
21022593348eSBarry Smith   *A = 0;
2103d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView);
21042593348eSBarry Smith   PLogObjectCreate(B);
2105b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
210644cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
21072593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
21081a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
21091a6a6d98SBarry Smith   if (!flg) {
21107fc0212eSBarry Smith     switch (bs) {
21117fc0212eSBarry Smith     case 1:
21127fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21131a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_1;
211439b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_1;
2115f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
21167fc0212eSBarry Smith       break;
21174eeb42bcSBarry Smith     case 2:
21184eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
21191a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_2;
212039b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_2;
2121f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
21226d84be18SBarry Smith       break;
2123f327dd97SBarry Smith     case 3:
2124f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
21251a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_3;
212639b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_3;
2127f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
21284eeb42bcSBarry Smith       break;
21296d84be18SBarry Smith     case 4:
21306d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
21311a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_4;
213239b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_4;
2133f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
21346d84be18SBarry Smith       break;
21356d84be18SBarry Smith     case 5:
21366d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
21371a6a6d98SBarry Smith       B->ops.solve           = MatSolve_SeqBAIJ_5;
213839b95ed1SBarry Smith       B->ops.mult            = MatMult_SeqBAIJ_5;
2139f44d3a6dSSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
21406d84be18SBarry Smith       break;
214148e9ddb2SSatish Balay     case 7:
214248e9ddb2SSatish Balay       B->ops.mult            = MatMult_SeqBAIJ_7;
214348e9ddb2SSatish Balay       B->ops.solve           = MatSolve_SeqBAIJ_7;
214448e9ddb2SSatish Balay       B->ops.multadd         = MatMultAdd_SeqBAIJ_7;
214548e9ddb2SSatish Balay       break;
21467fc0212eSBarry Smith     }
21471a6a6d98SBarry Smith   }
2148b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
2149b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
21502593348eSBarry Smith   B->factor           = 0;
21512593348eSBarry Smith   B->lupivotthreshold = 1.0;
215290f02eecSBarry Smith   B->mapping          = 0;
21532593348eSBarry Smith   b->row              = 0;
21542593348eSBarry Smith   b->col              = 0;
21552593348eSBarry Smith   b->reallocs         = 0;
21562593348eSBarry Smith 
215744cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
215844cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
2159b6490206SBarry Smith   b->mbs     = mbs;
2160f2501298SSatish Balay   b->nbs     = nbs;
2161b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
21622593348eSBarry Smith   if (nnz == PETSC_NULL) {
2163119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
21642593348eSBarry Smith     else if (nz <= 0)        nz = 1;
2165b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
2166b6490206SBarry Smith     nz = nz*mbs;
21672593348eSBarry Smith   }
21682593348eSBarry Smith   else {
21692593348eSBarry Smith     nz = 0;
2170b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
21712593348eSBarry Smith   }
21722593348eSBarry Smith 
21732593348eSBarry Smith   /* allocate the matrix space */
21747e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
21752593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
21767e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
21777e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
21782593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
21792593348eSBarry Smith   b->i  = b->j + nz;
21802593348eSBarry Smith   b->singlemalloc = 1;
21812593348eSBarry Smith 
2182de6a44a3SBarry Smith   b->i[0] = 0;
2183b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
21842593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
21852593348eSBarry Smith   }
21862593348eSBarry Smith 
2187b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
2188b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
2189f09e8eb9SSatish Balay   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2190b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
21912593348eSBarry Smith 
2192b6490206SBarry Smith   b->bs               = bs;
21939df24120SSatish Balay   b->bs2              = bs2;
2194b6490206SBarry Smith   b->mbs              = mbs;
21952593348eSBarry Smith   b->nz               = 0;
219618e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
21972593348eSBarry Smith   b->sorted           = 0;
21982593348eSBarry Smith   b->roworiented      = 1;
21992593348eSBarry Smith   b->nonew            = 0;
22002593348eSBarry Smith   b->diag             = 0;
22012593348eSBarry Smith   b->solve_work       = 0;
2202de6a44a3SBarry Smith   b->mult_work        = 0;
22032593348eSBarry Smith   b->spptr            = 0;
22044e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
22054e220ebcSLois Curfman McInnes 
22062593348eSBarry Smith   *A = B;
22072593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
22082593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
22092593348eSBarry Smith   return 0;
22102593348eSBarry Smith }
22112593348eSBarry Smith 
22125615d1e5SSatish Balay #undef __FUNC__
22135615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ"
2214b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
22152593348eSBarry Smith {
22162593348eSBarry Smith   Mat         C;
2217b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
22189df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2219de6a44a3SBarry Smith 
2220e3372554SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix");
22212593348eSBarry Smith 
22222593348eSBarry Smith   *B = 0;
2223d4bb536fSBarry Smith   PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView);
22242593348eSBarry Smith   PLogObjectCreate(C);
2225b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
22262593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
2227b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
2228b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
22292593348eSBarry Smith   C->factor     = A->factor;
22302593348eSBarry Smith   c->row        = 0;
22312593348eSBarry Smith   c->col        = 0;
22322593348eSBarry Smith   C->assembled  = PETSC_TRUE;
22332593348eSBarry Smith 
223444cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
223544cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
223644cd7ae7SLois Curfman McInnes   C->M          = a->m;
223744cd7ae7SLois Curfman McInnes   C->N          = a->n;
223844cd7ae7SLois Curfman McInnes 
2239b6490206SBarry Smith   c->bs         = a->bs;
22409df24120SSatish Balay   c->bs2        = a->bs2;
2241b6490206SBarry Smith   c->mbs        = a->mbs;
2242b6490206SBarry Smith   c->nbs        = a->nbs;
22432593348eSBarry Smith 
2244b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
2245b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
2246b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
22472593348eSBarry Smith     c->imax[i] = a->imax[i];
22482593348eSBarry Smith     c->ilen[i] = a->ilen[i];
22492593348eSBarry Smith   }
22502593348eSBarry Smith 
22512593348eSBarry Smith   /* allocate the matrix space */
22522593348eSBarry Smith   c->singlemalloc = 1;
22537e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
22542593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
22557e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
2256de6a44a3SBarry Smith   c->i  = c->j + nz;
2257b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
2258b6490206SBarry Smith   if (mbs > 0) {
2259de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
22602593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
22617e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
22622593348eSBarry Smith     }
22632593348eSBarry Smith   }
22642593348eSBarry Smith 
2265f09e8eb9SSatish Balay   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
22662593348eSBarry Smith   c->sorted      = a->sorted;
22672593348eSBarry Smith   c->roworiented = a->roworiented;
22682593348eSBarry Smith   c->nonew       = a->nonew;
22692593348eSBarry Smith 
22702593348eSBarry Smith   if (a->diag) {
2271b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
2272b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
2273b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
22742593348eSBarry Smith       c->diag[i] = a->diag[i];
22752593348eSBarry Smith     }
22762593348eSBarry Smith   }
22772593348eSBarry Smith   else c->diag          = 0;
22782593348eSBarry Smith   c->nz                 = a->nz;
22792593348eSBarry Smith   c->maxnz              = a->maxnz;
22802593348eSBarry Smith   c->solve_work         = 0;
22812593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
22827fc0212eSBarry Smith   c->mult_work          = 0;
22832593348eSBarry Smith   *B = C;
22842593348eSBarry Smith   return 0;
22852593348eSBarry Smith }
22862593348eSBarry Smith 
22875615d1e5SSatish Balay #undef __FUNC__
22885615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ"
228919bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
22902593348eSBarry Smith {
2291b6490206SBarry Smith   Mat_SeqBAIJ  *a;
22922593348eSBarry Smith   Mat          B;
2293de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
2294b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
229535aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2296a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
2297b6490206SBarry Smith   Scalar       *aa;
229819bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
22992593348eSBarry Smith 
23001a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2301de6a44a3SBarry Smith   bs2  = bs*bs;
2302b6490206SBarry Smith 
23032593348eSBarry Smith   MPI_Comm_size(comm,&size);
2304e3372554SBarry Smith   if (size > 1) SETERRQ(1,0,"view must have one processor");
230590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2306*0752156aSBarry Smith   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
2307e3372554SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object");
23082593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
23092593348eSBarry Smith 
2310e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
231135aab85fSBarry Smith 
231235aab85fSBarry Smith   /*
231335aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
231435aab85fSBarry Smith     divisible by the blocksize
231535aab85fSBarry Smith   */
2316b6490206SBarry Smith   mbs        = M/bs;
231735aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
231835aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
231935aab85fSBarry Smith   else                  mbs++;
232035aab85fSBarry Smith   if (extra_rows) {
2321537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
232235aab85fSBarry Smith   }
2323b6490206SBarry Smith 
23242593348eSBarry Smith   /* read in row lengths */
232535aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2326*0752156aSBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
232735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
23282593348eSBarry Smith 
2329b6490206SBarry Smith   /* read in column indices */
233035aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
2331*0752156aSBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
233235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
2333b6490206SBarry Smith 
2334b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
2335b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
2336b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
233735aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
233835aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
233935aab85fSBarry Smith   masked = mask + mbs;
2340b6490206SBarry Smith   rowcount = 0; nzcount = 0;
2341b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
234235aab85fSBarry Smith     nmask = 0;
2343b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2344b6490206SBarry Smith       kmax = rowlengths[rowcount];
2345b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
234635aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
234735aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2348b6490206SBarry Smith       }
2349b6490206SBarry Smith       rowcount++;
2350b6490206SBarry Smith     }
235135aab85fSBarry Smith     browlengths[i] += nmask;
235235aab85fSBarry Smith     /* zero out the mask elements we set */
235335aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2354b6490206SBarry Smith   }
2355b6490206SBarry Smith 
23562593348eSBarry Smith   /* create our matrix */
23573eee16b0SBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
23582593348eSBarry Smith   B = *A;
2359b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
23602593348eSBarry Smith 
23612593348eSBarry Smith   /* set matrix "i" values */
2362de6a44a3SBarry Smith   a->i[0] = 0;
2363b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2364b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2365b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
23662593348eSBarry Smith   }
2367b6490206SBarry Smith   a->nz         = 0;
2368b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
23692593348eSBarry Smith 
2370b6490206SBarry Smith   /* read in nonzero values */
237135aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2372*0752156aSBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
237335aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2374b6490206SBarry Smith 
2375b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2376b6490206SBarry Smith   nzcount = 0; jcount = 0;
2377b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2378b6490206SBarry Smith     nzcountb = nzcount;
237935aab85fSBarry Smith     nmask    = 0;
2380b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2381b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2382b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
238335aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
238435aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2385b6490206SBarry Smith       }
2386b6490206SBarry Smith       rowcount++;
2387b6490206SBarry Smith     }
2388de6a44a3SBarry Smith     /* sort the masked values */
238977c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2390de6a44a3SBarry Smith 
2391b6490206SBarry Smith     /* set "j" values into matrix */
2392b6490206SBarry Smith     maskcount = 1;
239335aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
239435aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2395de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2396b6490206SBarry Smith     }
2397b6490206SBarry Smith     /* set "a" values into matrix */
2398de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2399b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2400b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2401b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2402de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2403de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2404de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2405de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2406b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2407b6490206SBarry Smith       }
2408b6490206SBarry Smith     }
240935aab85fSBarry Smith     /* zero out the mask elements we set */
241035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2411b6490206SBarry Smith   }
2412e3372554SBarry Smith   if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix");
2413b6490206SBarry Smith 
2414b6490206SBarry Smith   PetscFree(rowlengths);
2415b6490206SBarry Smith   PetscFree(browlengths);
2416b6490206SBarry Smith   PetscFree(aa);
2417b6490206SBarry Smith   PetscFree(jj);
2418b6490206SBarry Smith   PetscFree(mask);
2419b6490206SBarry Smith 
2420b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2421de6a44a3SBarry Smith 
2422de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2423de6a44a3SBarry Smith   if (flg) {
242419bcc07fSBarry Smith     Viewer tviewer;
242519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2426639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
242719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
242819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2429de6a44a3SBarry Smith   }
2430de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2431de6a44a3SBarry Smith   if (flg) {
243219bcc07fSBarry Smith     Viewer tviewer;
243319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2434639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
243519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
243619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2437de6a44a3SBarry Smith   }
2438de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2439de6a44a3SBarry Smith   if (flg) {
244019bcc07fSBarry Smith     Viewer tviewer;
244119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
244219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
244319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2444de6a44a3SBarry Smith   }
2445de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2446de6a44a3SBarry Smith   if (flg) {
244719bcc07fSBarry Smith     Viewer tviewer;
244819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2449639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
245019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
245119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2452de6a44a3SBarry Smith   }
2453de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2454de6a44a3SBarry Smith   if (flg) {
245519bcc07fSBarry Smith     Viewer tviewer;
245619bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
245719bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
245819bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
245919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2460de6a44a3SBarry Smith   }
24612593348eSBarry Smith   return 0;
24622593348eSBarry Smith }
24632593348eSBarry Smith 
24642593348eSBarry Smith 
24652593348eSBarry Smith 
2466