xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2c3acbe9ddcb1424b0c1a2c5c7ff7d317d17c150)
12593348eSBarry Smith #ifndef lint
2*2c3acbe9SBarry Smith static char vcid[] = "$Id: baij.c,v 1.77 1996/12/02 20:13:16 balay Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
19de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
20de6a44a3SBarry Smith {
21de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
23de6a44a3SBarry Smith 
24de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
267fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
27de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28de6a44a3SBarry Smith       if (a->j[j] == i) {
29de6a44a3SBarry Smith         diag[i] = j;
30de6a44a3SBarry Smith         break;
31de6a44a3SBarry Smith       }
32de6a44a3SBarry Smith     }
33de6a44a3SBarry Smith   }
34de6a44a3SBarry Smith   a->diag = diag;
35de6a44a3SBarry Smith   return 0;
36de6a44a3SBarry Smith }
372593348eSBarry Smith 
382593348eSBarry Smith #include "draw.h"
392593348eSBarry Smith #include "pinclude/pviewer.h"
4077c4ece6SBarry Smith #include "sys.h"
412593348eSBarry Smith 
423b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
433b2fbd54SBarry Smith 
443b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
453b2fbd54SBarry Smith                             PetscTruth *done)
463b2fbd54SBarry Smith {
473b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
483b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
493b2fbd54SBarry Smith 
503b2fbd54SBarry Smith   *nn = n;
513b2fbd54SBarry Smith   if (!ia) return 0;
523b2fbd54SBarry Smith   if (symmetric) {
533b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
543b2fbd54SBarry Smith   } else if (oshift == 1) {
553b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
563b2fbd54SBarry Smith     int nz = a->i[n] + 1;
573b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
583b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
593b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
603b2fbd54SBarry Smith   } else {
613b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
623b2fbd54SBarry Smith   }
633b2fbd54SBarry Smith 
643b2fbd54SBarry Smith   return 0;
653b2fbd54SBarry Smith }
663b2fbd54SBarry Smith 
673b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
683b2fbd54SBarry Smith                                 PetscTruth *done)
693b2fbd54SBarry Smith {
703b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
713b2fbd54SBarry Smith   int         i,n = a->mbs;
723b2fbd54SBarry Smith 
733b2fbd54SBarry Smith   if (!ia) return 0;
743b2fbd54SBarry Smith   if (symmetric) {
753b2fbd54SBarry Smith     PetscFree(*ia);
763b2fbd54SBarry Smith     PetscFree(*ja);
77af5da2bfSBarry Smith   } else if (oshift == 1) {
783b2fbd54SBarry Smith     int nz = a->i[n];
793b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
803b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
813b2fbd54SBarry Smith   }
823b2fbd54SBarry Smith   return 0;
833b2fbd54SBarry Smith }
843b2fbd54SBarry Smith 
853b2fbd54SBarry Smith 
86b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
872593348eSBarry Smith {
88b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
899df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90b6490206SBarry Smith   Scalar      *aa;
912593348eSBarry Smith 
9290ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
932593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
942593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
953b2fbd54SBarry Smith 
962593348eSBarry Smith   col_lens[1] = a->m;
972593348eSBarry Smith   col_lens[2] = a->n;
987e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
992593348eSBarry Smith 
1002593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
101b6490206SBarry Smith   count = 0;
102b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
103b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
104b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105b6490206SBarry Smith     }
1062593348eSBarry Smith   }
10777c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1082593348eSBarry Smith   PetscFree(col_lens);
1092593348eSBarry Smith 
1102593348eSBarry Smith   /* store column indices (zero start index) */
1117e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112b6490206SBarry Smith   count = 0;
113b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
114b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
115b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
117b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1182593348eSBarry Smith         }
1192593348eSBarry Smith       }
120b6490206SBarry Smith     }
121b6490206SBarry Smith   }
1227e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123b6490206SBarry Smith   PetscFree(jj);
1242593348eSBarry Smith 
1252593348eSBarry Smith   /* store nonzero values */
1267e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127b6490206SBarry Smith   count = 0;
128b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
129b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
130b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1327e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
133b6490206SBarry Smith         }
134b6490206SBarry Smith       }
135b6490206SBarry Smith     }
136b6490206SBarry Smith   }
1377e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138b6490206SBarry Smith   PetscFree(aa);
1392593348eSBarry Smith   return 0;
1402593348eSBarry Smith }
1412593348eSBarry Smith 
142b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1432593348eSBarry Smith {
144b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1459df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1462593348eSBarry Smith   FILE        *fd;
1472593348eSBarry Smith   char        *outputname;
1482593348eSBarry Smith 
14990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1502593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
15190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
152639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1537ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1542593348eSBarry Smith   }
155639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1572593348eSBarry Smith   }
158639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
15944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
16044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
16144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
16244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
16344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
16444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
16544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
16644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
16744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
16844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
16944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
17044cd7ae7SLois Curfman McInnes #else
17144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
17244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
17344cd7ae7SLois Curfman McInnes #endif
17444cd7ae7SLois Curfman McInnes           }
17544cd7ae7SLois Curfman McInnes         }
17644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
17744cd7ae7SLois Curfman McInnes       }
17844cd7ae7SLois Curfman McInnes     }
17944cd7ae7SLois Curfman McInnes   }
1802593348eSBarry Smith   else {
181b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
182b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
183b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
184b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
18688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1877e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
18888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1897e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
19088685aaeSLois Curfman McInnes           }
19188685aaeSLois Curfman McInnes           else {
1927e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
19388685aaeSLois Curfman McInnes           }
19488685aaeSLois Curfman McInnes #else
1957e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
19688685aaeSLois Curfman McInnes #endif
1972593348eSBarry Smith           }
1982593348eSBarry Smith         }
1992593348eSBarry Smith         fprintf(fd,"\n");
2002593348eSBarry Smith       }
2012593348eSBarry Smith     }
202b6490206SBarry Smith   }
2032593348eSBarry Smith   fflush(fd);
2042593348eSBarry Smith   return 0;
2052593348eSBarry Smith }
2062593348eSBarry Smith 
2073270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2083270192aSSatish Balay {
2093270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2103270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2113270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2123270192aSSatish Balay   Scalar       *aa;
2133270192aSSatish Balay   Draw         draw;
2143270192aSSatish Balay   DrawButton   button;
2153270192aSSatish Balay   PetscTruth   isnull;
2163270192aSSatish Balay 
2173270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2183270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2193270192aSSatish Balay 
2203270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2213270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2223270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2233270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2243270192aSSatish Balay   color = DRAW_BLUE;
2253270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2263270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2273270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2283270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2293270192aSSatish Balay       aa = a->a + j*bs2;
2303270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2313270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2323270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
2333270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2343270192aSSatish Balay         }
2353270192aSSatish Balay       }
2363270192aSSatish Balay     }
2373270192aSSatish Balay   }
2383270192aSSatish Balay   color = DRAW_CYAN;
2393270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2403270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2413270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2423270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2433270192aSSatish Balay       aa = a->a + j*bs2;
2443270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2453270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2463270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
2473270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2483270192aSSatish Balay         }
2493270192aSSatish Balay       }
2503270192aSSatish Balay     }
2513270192aSSatish Balay   }
2523270192aSSatish Balay 
2533270192aSSatish Balay   color = DRAW_RED;
2543270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2553270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2563270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2573270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2583270192aSSatish Balay       aa = a->a + j*bs2;
2593270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2603270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2613270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
2623270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2633270192aSSatish Balay         }
2643270192aSSatish Balay       }
2653270192aSSatish Balay     }
2663270192aSSatish Balay   }
2673270192aSSatish Balay 
2683270192aSSatish Balay   DrawFlush(draw);
2693270192aSSatish Balay   DrawGetPause(draw,&pause);
2703270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2713270192aSSatish Balay 
2723270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2733270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2743270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2753270192aSSatish Balay     DrawClear(draw);
2763270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2773270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2783270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2793270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2803270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2813270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2823270192aSSatish Balay     w *= scale; h *= scale;
2833270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2843270192aSSatish Balay     color = DRAW_BLUE;
2853270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2863270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2873270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2883270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
2893270192aSSatish Balay         aa = a->a + j*bs2;
2903270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
2913270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
2923270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
2933270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2943270192aSSatish Balay           }
2953270192aSSatish Balay         }
2963270192aSSatish Balay       }
2973270192aSSatish Balay     }
2983270192aSSatish Balay     color = DRAW_CYAN;
2993270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3003270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3013270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3023270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3033270192aSSatish Balay         aa = a->a + j*bs2;
3043270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3053270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3063270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
3073270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3083270192aSSatish Balay           }
3093270192aSSatish Balay         }
3103270192aSSatish Balay       }
3113270192aSSatish Balay     }
3123270192aSSatish Balay 
3133270192aSSatish Balay     color = DRAW_RED;
3143270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3153270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3163270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3173270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3183270192aSSatish Balay         aa = a->a + j*bs2;
3193270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3203270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3213270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
3223270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3233270192aSSatish Balay           }
3243270192aSSatish Balay         }
3253270192aSSatish Balay       }
3263270192aSSatish Balay     }
3273270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3283270192aSSatish Balay   }
3293270192aSSatish Balay   return 0;
3303270192aSSatish Balay }
3313270192aSSatish Balay 
332b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3332593348eSBarry Smith {
3342593348eSBarry Smith   Mat         A = (Mat) obj;
33519bcc07fSBarry Smith   ViewerType  vtype;
33619bcc07fSBarry Smith   int         ierr;
3372593348eSBarry Smith 
33819bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
33919bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
340b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
3412593348eSBarry Smith   }
34219bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3442593348eSBarry Smith   }
34519bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
346b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3472593348eSBarry Smith   }
34819bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3493270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3502593348eSBarry Smith   }
3512593348eSBarry Smith   return 0;
3522593348eSBarry Smith }
353b6490206SBarry Smith 
354119a934aSSatish Balay #define CHUNKSIZE  10
355cd0e1443SSatish Balay 
356639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
357cd0e1443SSatish Balay {
358cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
359cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
360cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
361a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3629df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
363cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
364cd0e1443SSatish Balay 
365cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
366cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3673b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
368cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
369cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
3703b2fbd54SBarry Smith #endif
371*2c3acbe9SBarry Smith     rp   = aj + ai[brow];
372*2c3acbe9SBarry Smith     ap   = aa + bs2*ai[brow];
373*2c3acbe9SBarry Smith     rmax = imax[brow];
374*2c3acbe9SBarry Smith     nrow = ailen[brow];
375cd0e1443SSatish Balay     low  = 0;
376cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3773b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
378cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
379cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
3803b2fbd54SBarry Smith #endif
381a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
382cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
383cd0e1443SSatish Balay       if (roworiented) {
384cd0e1443SSatish Balay         value = *v++;
385cd0e1443SSatish Balay       }
386cd0e1443SSatish Balay       else {
387cd0e1443SSatish Balay         value = v[k + l*m];
388cd0e1443SSatish Balay       }
389cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
390*2c3acbe9SBarry Smith       while (high-low > 7) {
391cd0e1443SSatish Balay         t = (low+high)/2;
392cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
393cd0e1443SSatish Balay         else              low  = t;
394cd0e1443SSatish Balay       }
395cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
396cd0e1443SSatish Balay         if (rp[i] > bcol) break;
397cd0e1443SSatish Balay         if (rp[i] == bcol) {
3987e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
399cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
400cd0e1443SSatish Balay           else                  *bap  = value;
401cd0e1443SSatish Balay           goto noinsert;
402cd0e1443SSatish Balay         }
403cd0e1443SSatish Balay       }
404cd0e1443SSatish Balay       if (nonew) goto noinsert;
405cd0e1443SSatish Balay       if (nrow >= rmax) {
406cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
407cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
408cd0e1443SSatish Balay         Scalar *new_a;
409cd0e1443SSatish Balay 
410cd0e1443SSatish Balay         /* malloc new storage space */
4117e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
412cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4137e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
414cd0e1443SSatish Balay         new_i   = new_j + new_nz;
415cd0e1443SSatish Balay 
416cd0e1443SSatish Balay         /* copy over old data into new slots */
417cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4187e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
419a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
420a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
421a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
422cd0e1443SSatish Balay                                                            len*sizeof(int));
423a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
424a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
425a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
426a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
427cd0e1443SSatish Balay         /* free up old matrix storage */
428cd0e1443SSatish Balay         PetscFree(a->a);
429cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
430cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
431cd0e1443SSatish Balay         a->singlemalloc = 1;
432cd0e1443SSatish Balay 
433a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
434cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4357e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
43618e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
437cd0e1443SSatish Balay         a->reallocs++;
438119a934aSSatish Balay         a->nz++;
439cd0e1443SSatish Balay       }
4407e67e3f9SSatish Balay       N = nrow++ - 1;
441cd0e1443SSatish Balay       /* shift up all the later entries in this row */
442cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
443cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4447e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
445cd0e1443SSatish Balay       }
4467e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
447cd0e1443SSatish Balay       rp[i]                      = bcol;
4487e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
449cd0e1443SSatish Balay       noinsert:;
450cd0e1443SSatish Balay       low = i;
451cd0e1443SSatish Balay     }
452cd0e1443SSatish Balay     ailen[brow] = nrow;
453cd0e1443SSatish Balay   }
454cd0e1443SSatish Balay   return 0;
455cd0e1443SSatish Balay }
456cd0e1443SSatish Balay 
4570b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4580b824a48SSatish Balay {
4590b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4600b824a48SSatish Balay   *m = a->m; *n = a->n;
4610b824a48SSatish Balay   return 0;
4620b824a48SSatish Balay }
4630b824a48SSatish Balay 
4640b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4650b824a48SSatish Balay {
4660b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4670b824a48SSatish Balay   *m = 0; *n = a->m;
4680b824a48SSatish Balay   return 0;
4690b824a48SSatish Balay }
4700b824a48SSatish Balay 
4719867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4729867e207SSatish Balay {
4739867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4747e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4759867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4769867e207SSatish Balay 
4779867e207SSatish Balay   bs  = a->bs;
4789867e207SSatish Balay   ai  = a->i;
4799867e207SSatish Balay   aj  = a->j;
4809867e207SSatish Balay   aa  = a->a;
4819df24120SSatish Balay   bs2 = a->bs2;
4829867e207SSatish Balay 
4839867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
4849867e207SSatish Balay 
4859867e207SSatish Balay   bn  = row/bs;   /* Block number */
4869867e207SSatish Balay   bp  = row % bs; /* Block Position */
4879867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
4889867e207SSatish Balay   *nz = bs*M;
4899867e207SSatish Balay 
4909867e207SSatish Balay   if (v) {
4919867e207SSatish Balay     *v = 0;
4929867e207SSatish Balay     if (*nz) {
4939867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
4949867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
4959867e207SSatish Balay         v_i  = *v + i*bs;
4967e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
4977e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
4989867e207SSatish Balay       }
4999867e207SSatish Balay     }
5009867e207SSatish Balay   }
5019867e207SSatish Balay 
5029867e207SSatish Balay   if (idx) {
5039867e207SSatish Balay     *idx = 0;
5049867e207SSatish Balay     if (*nz) {
5059867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5069867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5079867e207SSatish Balay         idx_i = *idx + i*bs;
5089867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5099867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5109867e207SSatish Balay       }
5119867e207SSatish Balay     }
5129867e207SSatish Balay   }
5139867e207SSatish Balay   return 0;
5149867e207SSatish Balay }
5159867e207SSatish Balay 
5169867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5179867e207SSatish Balay {
5189867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5199867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5209867e207SSatish Balay   return 0;
5219867e207SSatish Balay }
522b6490206SBarry Smith 
523f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
524f2501298SSatish Balay {
525f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
526f2501298SSatish Balay   Mat         C;
527f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5289df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
529f2501298SSatish Balay   Scalar      *array=a->a;
530f2501298SSatish Balay 
531f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
532f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
533f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
534f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
535a7c10996SSatish Balay 
536a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
537f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
538f2501298SSatish Balay   PetscFree(col);
539f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
540f2501298SSatish Balay   cols = rows + bs;
541f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
542f2501298SSatish Balay     cols[0] = i*bs;
543f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
544f2501298SSatish Balay     len = ai[i+1] - ai[i];
545f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
546f2501298SSatish Balay       rows[0] = (*aj++)*bs;
547f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
548f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
549f2501298SSatish Balay       array += bs2;
550f2501298SSatish Balay     }
551f2501298SSatish Balay   }
5521073c447SSatish Balay   PetscFree(rows);
553f2501298SSatish Balay 
5546d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5556d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
556f2501298SSatish Balay 
557f2501298SSatish Balay   if (B != PETSC_NULL) {
558f2501298SSatish Balay     *B = C;
559f2501298SSatish Balay   } else {
560f2501298SSatish Balay     /* This isn't really an in-place transpose */
561f2501298SSatish Balay     PetscFree(a->a);
562f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
563f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
564f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
565f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
566f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
567f2501298SSatish Balay     PetscFree(a);
568f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
569f2501298SSatish Balay     PetscHeaderDestroy(C);
570f2501298SSatish Balay   }
571f2501298SSatish Balay   return 0;
572f2501298SSatish Balay }
573f2501298SSatish Balay 
574f2501298SSatish Balay 
575584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
576584200bdSSatish Balay {
577584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
578584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
579a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
5809df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
581584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
582584200bdSSatish Balay 
5836d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
584584200bdSSatish Balay 
585584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
586584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
587584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
588584200bdSSatish Balay     if (fshift) {
589a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
590584200bdSSatish Balay       N = ailen[i];
591584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
592584200bdSSatish Balay         ip[j-fshift] = ip[j];
5937e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
594584200bdSSatish Balay       }
595584200bdSSatish Balay     }
596584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
597584200bdSSatish Balay   }
598584200bdSSatish Balay   if (mbs) {
599584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
600584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
601584200bdSSatish Balay   }
602584200bdSSatish Balay   /* reset ilen and imax for each row */
603584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
604584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
605584200bdSSatish Balay   }
606a7c10996SSatish Balay   a->nz = ai[mbs];
607584200bdSSatish Balay 
608584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
609584200bdSSatish Balay   if (fshift && a->diag) {
610584200bdSSatish Balay     PetscFree(a->diag);
611584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
612584200bdSSatish Balay     a->diag = 0;
613584200bdSSatish Balay   }
6143d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
615219d9a1aSLois Curfman McInnes            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
6163d2013a6SLois Curfman McInnes   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
617584200bdSSatish Balay            a->reallocs);
618e2f3b5e9SSatish Balay   a->reallocs          = 0;
6194e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
6204e220ebcSLois Curfman McInnes 
621584200bdSSatish Balay   return 0;
622584200bdSSatish Balay }
623584200bdSSatish Balay 
624b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6252593348eSBarry Smith {
626b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6279df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6282593348eSBarry Smith   return 0;
6292593348eSBarry Smith }
6302593348eSBarry Smith 
631b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6322593348eSBarry Smith {
6332593348eSBarry Smith   Mat         A  = (Mat) obj;
634b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
63590f02eecSBarry Smith   int         ierr;
6362593348eSBarry Smith 
6372593348eSBarry Smith #if defined(PETSC_LOG)
6382593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6392593348eSBarry Smith #endif
6402593348eSBarry Smith   PetscFree(a->a);
6412593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6422593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6432593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6442593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6452593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
646de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6472593348eSBarry Smith   PetscFree(a);
64890f02eecSBarry Smith   if (A->mapping) {
64990f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
65090f02eecSBarry Smith   }
651f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
652f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6532593348eSBarry Smith   return 0;
6542593348eSBarry Smith }
6552593348eSBarry Smith 
656b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6572593348eSBarry Smith {
658b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6596d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6606d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6616d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
662219d9a1aSLois Curfman McInnes   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
6636d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6646d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6656d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
666219d9a1aSLois Curfman McInnes            op == MAT_ROWS_UNSORTED ||
6676d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
6686d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
66990f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
67090f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
67194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
6726d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
6736d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
6742593348eSBarry Smith   else
675b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
6762593348eSBarry Smith   return 0;
6772593348eSBarry Smith }
6782593348eSBarry Smith 
6792593348eSBarry Smith 
6802593348eSBarry Smith /* -------------------------------------------------------*/
6812593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
6822593348eSBarry Smith /* -------------------------------------------------------*/
683b6490206SBarry Smith #include "pinclude/plapack.h"
684b6490206SBarry Smith 
68539b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
6862593348eSBarry Smith {
687b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
68839b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
689c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
6902593348eSBarry Smith 
691c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
692c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
693b6490206SBarry Smith 
694119a934aSSatish Balay   idx   = a->j;
695119a934aSSatish Balay   v     = a->a;
696119a934aSSatish Balay   ii    = a->i;
697119a934aSSatish Balay 
698119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
699119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
700119a934aSSatish Balay     sum  = 0.0;
701119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
7021a6a6d98SBarry Smith     z[i] = sum;
703119a934aSSatish Balay   }
704c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
705c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
70639b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
70739b95ed1SBarry Smith   return 0;
70839b95ed1SBarry Smith }
70939b95ed1SBarry Smith 
71039b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
71139b95ed1SBarry Smith {
71239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
71339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
71439b95ed1SBarry Smith   register Scalar x1,x2;
71539b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
716c16cb8f2SBarry Smith   int             j,n;
71739b95ed1SBarry Smith 
718c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
719c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
72039b95ed1SBarry Smith 
72139b95ed1SBarry Smith   idx   = a->j;
72239b95ed1SBarry Smith   v     = a->a;
72339b95ed1SBarry Smith   ii    = a->i;
72439b95ed1SBarry Smith 
725119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
726119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
727119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
728119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
729119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
730119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
731119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
732119a934aSSatish Balay       v += 4;
733119a934aSSatish Balay     }
7341a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
735119a934aSSatish Balay     z += 2;
736119a934aSSatish Balay   }
737c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
738c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
73939b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
74039b95ed1SBarry Smith   return 0;
74139b95ed1SBarry Smith }
74239b95ed1SBarry Smith 
74339b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
74439b95ed1SBarry Smith {
74539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
74639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
747c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
74839b95ed1SBarry Smith 
749c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
750c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
75139b95ed1SBarry Smith 
75239b95ed1SBarry Smith   idx   = a->j;
75339b95ed1SBarry Smith   v     = a->a;
75439b95ed1SBarry Smith   ii    = a->i;
75539b95ed1SBarry Smith 
756119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
757119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
758119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
759119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
760119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
761119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
762119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
763119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
764119a934aSSatish Balay       v += 9;
765119a934aSSatish Balay     }
7661a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
767119a934aSSatish Balay     z += 3;
768119a934aSSatish Balay   }
769c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
770c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
77139b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
77239b95ed1SBarry Smith   return 0;
77339b95ed1SBarry Smith }
77439b95ed1SBarry Smith 
77539b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
77639b95ed1SBarry Smith {
77739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
77839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
77939b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
78039b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
781c16cb8f2SBarry Smith   int             j,n;
78239b95ed1SBarry Smith 
783c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
784c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
78539b95ed1SBarry Smith 
78639b95ed1SBarry Smith   idx   = a->j;
78739b95ed1SBarry Smith   v     = a->a;
78839b95ed1SBarry Smith   ii    = a->i;
78939b95ed1SBarry Smith 
790119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
791119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
792119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
793119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
794119a934aSSatish Balay       xb = x + 4*(*idx++);
795119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
796119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
797119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
798119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
799119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
800119a934aSSatish Balay       v += 16;
801119a934aSSatish Balay     }
8021a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
803119a934aSSatish Balay     z += 4;
804119a934aSSatish Balay   }
805c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
806c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
80739b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
80839b95ed1SBarry Smith   return 0;
80939b95ed1SBarry Smith }
81039b95ed1SBarry Smith 
81139b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
81239b95ed1SBarry Smith {
81339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
81439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
81539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
816c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
81739b95ed1SBarry Smith 
818c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
819c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
82039b95ed1SBarry Smith 
82139b95ed1SBarry Smith   idx   = a->j;
82239b95ed1SBarry Smith   v     = a->a;
82339b95ed1SBarry Smith   ii    = a->i;
82439b95ed1SBarry Smith 
825119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
826119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
827119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
828119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
829119a934aSSatish Balay       xb = x + 5*(*idx++);
830119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
831119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
832119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
833119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
834119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
835119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
836119a934aSSatish Balay       v += 25;
837119a934aSSatish Balay     }
8381a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
839119a934aSSatish Balay     z += 5;
840119a934aSSatish Balay   }
841c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
842c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
84339b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
84439b95ed1SBarry Smith   return 0;
84539b95ed1SBarry Smith }
84639b95ed1SBarry Smith 
84739b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
84839b95ed1SBarry Smith {
84939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
85039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
851c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
852c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
853c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
85439b95ed1SBarry Smith 
855c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
856c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
85739b95ed1SBarry Smith 
85839b95ed1SBarry Smith   idx   = a->j;
85939b95ed1SBarry Smith   v     = a->a;
86039b95ed1SBarry Smith   ii    = a->i;
86139b95ed1SBarry Smith 
86239b95ed1SBarry Smith 
863119a934aSSatish Balay   if (!a->mult_work) {
8643b547af2SSatish Balay     k = PetscMax(a->m,a->n);
8652b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
866119a934aSSatish Balay   }
867119a934aSSatish Balay   work = a->mult_work;
868119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
869119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
870119a934aSSatish Balay     ncols = n*bs;
871119a934aSSatish Balay     workt = work;
872119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
873119a934aSSatish Balay       xb = x + bs*(*idx++);
874119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
875119a934aSSatish Balay       workt += bs;
876119a934aSSatish Balay     }
8771a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
878119a934aSSatish Balay     v += n*bs2;
879119a934aSSatish Balay     z += bs;
880119a934aSSatish Balay   }
881c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
882c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
8831a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
884bb42667fSSatish Balay   return 0;
885bb42667fSSatish Balay }
886bb42667fSSatish Balay 
887f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
888f44d3a6dSSatish Balay {
889f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
890f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
891c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
892f44d3a6dSSatish Balay 
893c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
894c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
895c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
896f44d3a6dSSatish Balay 
897f44d3a6dSSatish Balay   idx   = a->j;
898f44d3a6dSSatish Balay   v     = a->a;
899f44d3a6dSSatish Balay   ii    = a->i;
900f44d3a6dSSatish Balay 
901f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
902f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
903f44d3a6dSSatish Balay     sum  = y[i];
904f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
905f44d3a6dSSatish Balay     z[i] = sum;
906f44d3a6dSSatish Balay   }
907c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
908c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
909c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
910f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
911f44d3a6dSSatish Balay   return 0;
912f44d3a6dSSatish Balay }
913f44d3a6dSSatish Balay 
914f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
915f44d3a6dSSatish Balay {
916f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
917f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
918f44d3a6dSSatish Balay   register Scalar x1,x2;
919f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
920c16cb8f2SBarry Smith   int             j,n;
921f44d3a6dSSatish Balay 
922c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
923c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
924c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
925f44d3a6dSSatish Balay 
926f44d3a6dSSatish Balay   idx   = a->j;
927f44d3a6dSSatish Balay   v     = a->a;
928f44d3a6dSSatish Balay   ii    = a->i;
929f44d3a6dSSatish Balay 
930f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
931f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
932f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
933f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
934f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
935f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
936f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
937f44d3a6dSSatish Balay       v += 4;
938f44d3a6dSSatish Balay     }
939f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
940f44d3a6dSSatish Balay     z += 2; y += 2;
941f44d3a6dSSatish Balay   }
942c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
943c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
944c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
945f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
946f44d3a6dSSatish Balay   return 0;
947f44d3a6dSSatish Balay }
948f44d3a6dSSatish Balay 
949f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
950f44d3a6dSSatish Balay {
951f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
952f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
953c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
954f44d3a6dSSatish Balay 
955c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
956c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
957c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
958f44d3a6dSSatish Balay 
959f44d3a6dSSatish Balay   idx   = a->j;
960f44d3a6dSSatish Balay   v     = a->a;
961f44d3a6dSSatish Balay   ii    = a->i;
962f44d3a6dSSatish Balay 
963f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
964f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
965f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
966f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
967f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
968f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
969f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
970f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
971f44d3a6dSSatish Balay       v += 9;
972f44d3a6dSSatish Balay     }
973f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
974f44d3a6dSSatish Balay     z += 3; y += 3;
975f44d3a6dSSatish Balay   }
976c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
977c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
978c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
979f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
980f44d3a6dSSatish Balay   return 0;
981f44d3a6dSSatish Balay }
982f44d3a6dSSatish Balay 
983f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
984f44d3a6dSSatish Balay {
985f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
986f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
987f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
988f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
989c16cb8f2SBarry Smith   int             j,n;
990f44d3a6dSSatish Balay 
991c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
992c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
993c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
994f44d3a6dSSatish Balay 
995f44d3a6dSSatish Balay   idx   = a->j;
996f44d3a6dSSatish Balay   v     = a->a;
997f44d3a6dSSatish Balay   ii    = a->i;
998f44d3a6dSSatish Balay 
999f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1000f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1001f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1002f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1003f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1004f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1005f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1006f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1007f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1008f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1009f44d3a6dSSatish Balay       v += 16;
1010f44d3a6dSSatish Balay     }
1011f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1012f44d3a6dSSatish Balay     z += 4; y += 4;
1013f44d3a6dSSatish Balay   }
1014c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1015c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1016c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1017f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1018f44d3a6dSSatish Balay   return 0;
1019f44d3a6dSSatish Balay }
1020f44d3a6dSSatish Balay 
1021f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1022f44d3a6dSSatish Balay {
1023f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1024f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1025f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1026c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1027f44d3a6dSSatish Balay 
1028c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1029c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1030c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1031f44d3a6dSSatish Balay 
1032f44d3a6dSSatish Balay   idx   = a->j;
1033f44d3a6dSSatish Balay   v     = a->a;
1034f44d3a6dSSatish Balay   ii    = a->i;
1035f44d3a6dSSatish Balay 
1036f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1037f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1038f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1039f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1040f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1041f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1042f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1043f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1044f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1045f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1046f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1047f44d3a6dSSatish Balay       v += 25;
1048f44d3a6dSSatish Balay     }
1049f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1050f44d3a6dSSatish Balay     z += 5; y += 5;
1051f44d3a6dSSatish Balay   }
1052c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1053c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1054c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1055f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1056f44d3a6dSSatish Balay   return 0;
1057f44d3a6dSSatish Balay }
1058f44d3a6dSSatish Balay 
1059f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1060f44d3a6dSSatish Balay {
1061f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1062f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1063f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1064f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1065f44d3a6dSSatish Balay 
1066f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1067f44d3a6dSSatish Balay 
1068c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1069c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1070f44d3a6dSSatish Balay 
1071f44d3a6dSSatish Balay   idx   = a->j;
1072f44d3a6dSSatish Balay   v     = a->a;
1073f44d3a6dSSatish Balay   ii    = a->i;
1074f44d3a6dSSatish Balay 
1075f44d3a6dSSatish Balay 
1076f44d3a6dSSatish Balay   if (!a->mult_work) {
1077f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1078f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1079f44d3a6dSSatish Balay   }
1080f44d3a6dSSatish Balay   work = a->mult_work;
1081f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1082f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1083f44d3a6dSSatish Balay     ncols = n*bs;
1084f44d3a6dSSatish Balay     workt = work;
1085f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1086f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1087f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1088f44d3a6dSSatish Balay       workt += bs;
1089f44d3a6dSSatish Balay     }
1090f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1091f44d3a6dSSatish Balay     v += n*bs2;
1092f44d3a6dSSatish Balay     z += bs;
1093f44d3a6dSSatish Balay   }
1094c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1095c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1096f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1097f44d3a6dSSatish Balay   return 0;
1098f44d3a6dSSatish Balay }
1099f44d3a6dSSatish Balay 
11001a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1101bb42667fSSatish Balay {
1102bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
11031a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1104bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1105bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
11069df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1107119a934aSSatish Balay 
1108119a934aSSatish Balay 
110990f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
111090f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1111bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1112bb42667fSSatish Balay 
1113119a934aSSatish Balay   idx   = a->j;
1114119a934aSSatish Balay   v     = a->a;
1115119a934aSSatish Balay   ii    = a->i;
1116119a934aSSatish Balay 
1117119a934aSSatish Balay   switch (bs) {
1118119a934aSSatish Balay   case 1:
1119119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1120119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1121119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1122119a934aSSatish Balay       ib = idx + ai[i];
1123119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1124bb1453f0SSatish Balay         rval    = ib[j];
1125bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1126119a934aSSatish Balay       }
1127119a934aSSatish Balay     }
1128119a934aSSatish Balay     break;
1129119a934aSSatish Balay   case 2:
1130119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1131119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1132119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1133119a934aSSatish Balay       ib = idx + ai[i];
1134119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1135119a934aSSatish Balay         rval      = ib[j]*2;
1136bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1137bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1138119a934aSSatish Balay         v += 4;
1139119a934aSSatish Balay       }
1140119a934aSSatish Balay     }
1141119a934aSSatish Balay     break;
1142119a934aSSatish Balay   case 3:
1143119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1144119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1145119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1146119a934aSSatish Balay       ib = idx + ai[i];
1147119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1148119a934aSSatish Balay         rval      = ib[j]*3;
1149bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1150bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1151bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1152119a934aSSatish Balay         v += 9;
1153119a934aSSatish Balay       }
1154119a934aSSatish Balay     }
1155119a934aSSatish Balay     break;
1156119a934aSSatish Balay   case 4:
1157119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1158119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1159119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1160119a934aSSatish Balay       ib = idx + ai[i];
1161119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1162119a934aSSatish Balay         rval      = ib[j]*4;
1163bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1164bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1165bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1166bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1167119a934aSSatish Balay         v += 16;
1168119a934aSSatish Balay       }
1169119a934aSSatish Balay     }
1170119a934aSSatish Balay     break;
1171119a934aSSatish Balay   case 5:
1172119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1173119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1174119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1175119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1176119a934aSSatish Balay       ib = idx + ai[i];
1177119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1178119a934aSSatish Balay         rval      = ib[j]*5;
1179bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1180bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1181bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1182bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1183bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1184119a934aSSatish Balay         v += 25;
1185119a934aSSatish Balay       }
1186119a934aSSatish Balay     }
1187119a934aSSatish Balay     break;
1188119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1189119a934aSSatish Balay     default: {
1190119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1191119a934aSSatish Balay       if (!a->mult_work) {
11923b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1193bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1194119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1195119a934aSSatish Balay       }
1196119a934aSSatish Balay       work = a->mult_work;
1197119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1198119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1199119a934aSSatish Balay         ncols = n*bs;
1200119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1201119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1202119a934aSSatish Balay         v += n*bs2;
1203119a934aSSatish Balay         x += bs;
1204119a934aSSatish Balay         workt = work;
1205119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1206119a934aSSatish Balay           zb = z + bs*(*idx++);
1207bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1208119a934aSSatish Balay           workt += bs;
1209119a934aSSatish Balay         }
1210119a934aSSatish Balay       }
1211119a934aSSatish Balay     }
1212119a934aSSatish Balay   }
12130419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12140419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1215faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1216faf6580fSSatish Balay   return 0;
1217faf6580fSSatish Balay }
1218faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219faf6580fSSatish Balay {
1220faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1221faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1222faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1223faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1224faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1225faf6580fSSatish Balay 
1226faf6580fSSatish Balay 
1227faf6580fSSatish Balay 
122890f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
122990f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1230faf6580fSSatish Balay 
1231648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1232648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1233648c1d95SSatish Balay 
1234faf6580fSSatish Balay 
1235faf6580fSSatish Balay   idx   = a->j;
1236faf6580fSSatish Balay   v     = a->a;
1237faf6580fSSatish Balay   ii    = a->i;
1238faf6580fSSatish Balay 
1239faf6580fSSatish Balay   switch (bs) {
1240faf6580fSSatish Balay   case 1:
1241faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1242faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1243faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1244faf6580fSSatish Balay       ib = idx + ai[i];
1245faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1246faf6580fSSatish Balay         rval    = ib[j];
1247faf6580fSSatish Balay         z[rval] += *v++ * x1;
1248faf6580fSSatish Balay       }
1249faf6580fSSatish Balay     }
1250faf6580fSSatish Balay     break;
1251faf6580fSSatish Balay   case 2:
1252faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1253faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1254faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1255faf6580fSSatish Balay       ib = idx + ai[i];
1256faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1257faf6580fSSatish Balay         rval      = ib[j]*2;
1258faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1259faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1260faf6580fSSatish Balay         v += 4;
1261faf6580fSSatish Balay       }
1262faf6580fSSatish Balay     }
1263faf6580fSSatish Balay     break;
1264faf6580fSSatish Balay   case 3:
1265faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1266faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1267faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1268faf6580fSSatish Balay       ib = idx + ai[i];
1269faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1270faf6580fSSatish Balay         rval      = ib[j]*3;
1271faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1272faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1273faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1274faf6580fSSatish Balay         v += 9;
1275faf6580fSSatish Balay       }
1276faf6580fSSatish Balay     }
1277faf6580fSSatish Balay     break;
1278faf6580fSSatish Balay   case 4:
1279faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1280faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1281faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1282faf6580fSSatish Balay       ib = idx + ai[i];
1283faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1284faf6580fSSatish Balay         rval      = ib[j]*4;
1285faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1286faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1287faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1288faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1289faf6580fSSatish Balay         v += 16;
1290faf6580fSSatish Balay       }
1291faf6580fSSatish Balay     }
1292faf6580fSSatish Balay     break;
1293faf6580fSSatish Balay   case 5:
1294faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1295faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1296faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1297faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1298faf6580fSSatish Balay       ib = idx + ai[i];
1299faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1300faf6580fSSatish Balay         rval      = ib[j]*5;
1301faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1302faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1303faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1304faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1305faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1306faf6580fSSatish Balay         v += 25;
1307faf6580fSSatish Balay       }
1308faf6580fSSatish Balay     }
1309faf6580fSSatish Balay     break;
1310faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1311faf6580fSSatish Balay     default: {
1312faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1313faf6580fSSatish Balay       if (!a->mult_work) {
1314faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1315faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1316faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1317faf6580fSSatish Balay       }
1318faf6580fSSatish Balay       work = a->mult_work;
1319faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1320faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1321faf6580fSSatish Balay         ncols = n*bs;
1322faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1323faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1324faf6580fSSatish Balay         v += n*bs2;
1325faf6580fSSatish Balay         x += bs;
1326faf6580fSSatish Balay         workt = work;
1327faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1328faf6580fSSatish Balay           zb = z + bs*(*idx++);
1329faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1330faf6580fSSatish Balay           workt += bs;
1331faf6580fSSatish Balay         }
1332faf6580fSSatish Balay       }
1333faf6580fSSatish Balay     }
1334faf6580fSSatish Balay   }
1335faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1336faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1337faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
13382593348eSBarry Smith   return 0;
13392593348eSBarry Smith }
13402593348eSBarry Smith 
13414e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
13422593348eSBarry Smith {
1343b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13444e220ebcSLois Curfman McInnes 
13454e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
13464e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
13474e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
13484e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
13494e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
13504e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
13514e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
13524e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
13534e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
13544e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
13554e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
13564e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
13574e220ebcSLois Curfman McInnes   info->memory       = A->mem;
13584e220ebcSLois Curfman McInnes   if (A->factor) {
13594e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
13604e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
13614e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
13624e220ebcSLois Curfman McInnes   } else {
13634e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
13644e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
13654e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
13664e220ebcSLois Curfman McInnes   }
13672593348eSBarry Smith   return 0;
13682593348eSBarry Smith }
13692593348eSBarry Smith 
137091d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
137191d316f6SSatish Balay {
137291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
137391d316f6SSatish Balay 
137491d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
137591d316f6SSatish Balay 
137691d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
137791d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1378a7c10996SSatish Balay       (a->nz != b->nz)) {
137991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138091d316f6SSatish Balay   }
138191d316f6SSatish Balay 
138291d316f6SSatish Balay   /* if the a->i are the same */
138391d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
138491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138591d316f6SSatish Balay   }
138691d316f6SSatish Balay 
138791d316f6SSatish Balay   /* if a->j are the same */
138891d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
138991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
139091d316f6SSatish Balay   }
139191d316f6SSatish Balay 
139291d316f6SSatish Balay   /* if a->a are the same */
139391d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
139491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
139591d316f6SSatish Balay   }
139691d316f6SSatish Balay   *flg = PETSC_TRUE;
139791d316f6SSatish Balay   return 0;
139891d316f6SSatish Balay 
139991d316f6SSatish Balay }
140091d316f6SSatish Balay 
140191d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
140291d316f6SSatish Balay {
140391d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14047e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
140517e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
140617e48fc4SSatish Balay 
140717e48fc4SSatish Balay   bs   = a->bs;
140817e48fc4SSatish Balay   aa   = a->a;
140917e48fc4SSatish Balay   ai   = a->i;
141017e48fc4SSatish Balay   aj   = a->j;
141117e48fc4SSatish Balay   ambs = a->mbs;
14129df24120SSatish Balay   bs2  = a->bs2;
141391d316f6SSatish Balay 
141491d316f6SSatish Balay   VecSet(&zero,v);
141590f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
14169867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
141717e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
141817e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
141917e48fc4SSatish Balay       if (aj[j] == i) {
142017e48fc4SSatish Balay         row  = i*bs;
14217e67e3f9SSatish Balay         aa_j = aa+j*bs2;
14227e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
142391d316f6SSatish Balay         break;
142491d316f6SSatish Balay       }
142591d316f6SSatish Balay     }
142691d316f6SSatish Balay   }
142791d316f6SSatish Balay   return 0;
142891d316f6SSatish Balay }
142991d316f6SSatish Balay 
14309867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
14319867e207SSatish Balay {
14329867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14339867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
14347e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
14359867e207SSatish Balay 
14369867e207SSatish Balay   ai  = a->i;
14379867e207SSatish Balay   aj  = a->j;
14389867e207SSatish Balay   aa  = a->a;
14399867e207SSatish Balay   m   = a->m;
14409867e207SSatish Balay   n   = a->n;
14419867e207SSatish Balay   bs  = a->bs;
14429867e207SSatish Balay   mbs = a->mbs;
14439df24120SSatish Balay   bs2 = a->bs2;
14449867e207SSatish Balay   if (ll) {
144590f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
14469867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
14479867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14489867e207SSatish Balay       M  = ai[i+1] - ai[i];
14499867e207SSatish Balay       li = l + i*bs;
14507e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14519867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14527e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
14539867e207SSatish Balay           (*v++) *= li[k%bs];
14549867e207SSatish Balay         }
14559867e207SSatish Balay       }
14569867e207SSatish Balay     }
14579867e207SSatish Balay   }
14589867e207SSatish Balay 
14599867e207SSatish Balay   if (rr) {
146090f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
14619867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
14629867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14639867e207SSatish Balay       M  = ai[i+1] - ai[i];
14647e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14659867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14669867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
14679867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
14689867e207SSatish Balay           x = ri[k];
14699867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
14709867e207SSatish Balay         }
14719867e207SSatish Balay       }
14729867e207SSatish Balay     }
14739867e207SSatish Balay   }
14749867e207SSatish Balay   return 0;
14759867e207SSatish Balay }
14769867e207SSatish Balay 
14779867e207SSatish Balay 
1478b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1479b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
14802a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1481736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1482736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
14831a6a6d98SBarry Smith 
14841a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
14851a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
14861a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
14871a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
14881a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
14891a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
14901a6a6d98SBarry Smith 
14917fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
14927fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
14937fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
14947fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
14957fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
14967fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
14972593348eSBarry Smith 
1498b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
14992593348eSBarry Smith {
1500b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15012593348eSBarry Smith   Scalar      *v = a->a;
15022593348eSBarry Smith   double      sum = 0.0;
15039df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
15042593348eSBarry Smith 
15052593348eSBarry Smith   if (type == NORM_FROBENIUS) {
15060419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
15072593348eSBarry Smith #if defined(PETSC_COMPLEX)
15082593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
15092593348eSBarry Smith #else
15102593348eSBarry Smith       sum += (*v)*(*v); v++;
15112593348eSBarry Smith #endif
15122593348eSBarry Smith     }
15132593348eSBarry Smith     *norm = sqrt(sum);
15142593348eSBarry Smith   }
15152593348eSBarry Smith   else {
1516b0267e0aSLois Curfman McInnes     SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet");
15172593348eSBarry Smith   }
15182593348eSBarry Smith   return 0;
15192593348eSBarry Smith }
15202593348eSBarry Smith 
15212593348eSBarry Smith /*
15222593348eSBarry Smith      note: This can only work for identity for row and col. It would
15232593348eSBarry Smith    be good to check this and otherwise generate an error.
15242593348eSBarry Smith */
1525b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
15262593348eSBarry Smith {
1527b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15282593348eSBarry Smith   Mat         outA;
1529de6a44a3SBarry Smith   int         ierr;
15302593348eSBarry Smith 
1531b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
15322593348eSBarry Smith 
15332593348eSBarry Smith   outA          = inA;
15342593348eSBarry Smith   inA->factor   = FACTOR_LU;
15352593348eSBarry Smith   a->row        = row;
15362593348eSBarry Smith   a->col        = col;
15372593348eSBarry Smith 
1538de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
15392593348eSBarry Smith 
15402593348eSBarry Smith   if (!a->diag) {
1541b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
15422593348eSBarry Smith   }
15437fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
15442593348eSBarry Smith   return 0;
15452593348eSBarry Smith }
15462593348eSBarry Smith 
1547b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
15482593348eSBarry Smith {
1549b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15509df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1551b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1552b6490206SBarry Smith   PLogFlops(totalnz);
15532593348eSBarry Smith   return 0;
15542593348eSBarry Smith }
15552593348eSBarry Smith 
15567e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
15577e67e3f9SSatish Balay {
15587e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15597e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1560a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
15619df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
15627e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
15637e67e3f9SSatish Balay 
15647e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
15657e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
15667e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
15677e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1568a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
15697e67e3f9SSatish Balay     nrow = ailen[brow];
15707e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
15717e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
15727e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1573a7c10996SSatish Balay       col  = in[l] ;
15747e67e3f9SSatish Balay       bcol = col/bs;
15757e67e3f9SSatish Balay       cidx = col%bs;
15767e67e3f9SSatish Balay       ridx = row%bs;
15777e67e3f9SSatish Balay       high = nrow;
15787e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
15797e67e3f9SSatish Balay       while (high-low > 5) {
15807e67e3f9SSatish Balay         t = (low+high)/2;
15817e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
15827e67e3f9SSatish Balay         else             low  = t;
15837e67e3f9SSatish Balay       }
15847e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
15857e67e3f9SSatish Balay         if (rp[i] > bcol) break;
15867e67e3f9SSatish Balay         if (rp[i] == bcol) {
15877e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
15887e67e3f9SSatish Balay           goto finished;
15897e67e3f9SSatish Balay         }
15907e67e3f9SSatish Balay       }
15917e67e3f9SSatish Balay       *v++ = zero;
15927e67e3f9SSatish Balay       finished:;
15937e67e3f9SSatish Balay     }
15947e67e3f9SSatish Balay   }
15957e67e3f9SSatish Balay   return 0;
15967e67e3f9SSatish Balay }
15977e67e3f9SSatish Balay 
15985a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
15995a838052SSatish Balay {
16005a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
16015a838052SSatish Balay   *bs = baij->bs;
16025a838052SSatish Balay   return 0;
16035a838052SSatish Balay }
16045a838052SSatish Balay 
1605d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1606d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1607d9b7c43dSSatish Balay {
1608d9b7c43dSSatish Balay   int i,row;
1609d9b7c43dSSatish Balay   row = idx[0];
1610d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1611d9b7c43dSSatish Balay 
1612d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1613d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1614d9b7c43dSSatish Balay   }
1615d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1616d9b7c43dSSatish Balay   return 0;
1617d9b7c43dSSatish Balay }
1618d9b7c43dSSatish Balay 
1619d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1620d9b7c43dSSatish Balay {
1621d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1622d9b7c43dSSatish Balay   IS          is_local;
1623d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1624d9b7c43dSSatish Balay   PetscTruth  flg;
1625d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1626d9b7c43dSSatish Balay 
1627d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1628d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1629d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1630537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1631d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1632d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1633d9b7c43dSSatish Balay 
1634d9b7c43dSSatish Balay   i = 0;
1635d9b7c43dSSatish Balay   while (i < is_n) {
1636d9b7c43dSSatish Balay     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1637d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1638d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1639d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1640d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1641d9b7c43dSSatish Balay       i += bs;
1642d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1643d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1644d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1645d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1646d9b7c43dSSatish Balay         aa[0] = zero;
1647d9b7c43dSSatish Balay         aa+=bs;
1648d9b7c43dSSatish Balay       }
1649d9b7c43dSSatish Balay       i++;
1650d9b7c43dSSatish Balay     }
1651d9b7c43dSSatish Balay   }
1652d9b7c43dSSatish Balay   if (diag) {
1653d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1654d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1655d9b7c43dSSatish Balay     }
1656d9b7c43dSSatish Balay   }
1657d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1658d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1659d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
16609a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1661d9b7c43dSSatish Balay 
1662d9b7c43dSSatish Balay   return 0;
1663d9b7c43dSSatish Balay }
1664ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1665ba4ca20aSSatish Balay {
1666ba4ca20aSSatish Balay   static int called = 0;
1667ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1668ba4ca20aSSatish Balay 
1669ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1670ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1671ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1672ba4ca20aSSatish Balay   return 0;
1673ba4ca20aSSatish Balay }
1674d9b7c43dSSatish Balay 
16752593348eSBarry Smith /* -------------------------------------------------------------------*/
1676cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
16779867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1678f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1679faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
16801a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1681b6490206SBarry Smith        0,0,
1682de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1683b6490206SBarry Smith        0,
1684f2501298SSatish Balay        MatTranspose_SeqBAIJ,
168517e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
16869867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1687584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1688b6490206SBarry Smith        0,
1689d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
16907fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1691b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1692de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1693b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1694b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1695b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1696af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
16977e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1698ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
16993b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
17003b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
17013b2fbd54SBarry Smith        MatRestoreRowIJ_SeqBAIJ};
17022593348eSBarry Smith 
17032593348eSBarry Smith /*@C
170444cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
170544cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
170644cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
17072bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
17082bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
17092593348eSBarry Smith 
17102593348eSBarry Smith    Input Parameters:
17112593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1712b6490206SBarry Smith .  bs - size of block
17132593348eSBarry Smith .  m - number of rows
17142593348eSBarry Smith .  n - number of columns
1715b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
17162bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
17172bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
17182593348eSBarry Smith 
17192593348eSBarry Smith    Output Parameter:
17202593348eSBarry Smith .  A - the matrix
17212593348eSBarry Smith 
17222593348eSBarry Smith    Notes:
172344cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
17242593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
172544cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
17262593348eSBarry Smith 
17272593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
17282593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
17292593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
17306da5968aSLois Curfman McInnes    matrices.
17312593348eSBarry Smith 
173244cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17332593348eSBarry Smith @*/
1734b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
17352593348eSBarry Smith {
17362593348eSBarry Smith   Mat         B;
1737b6490206SBarry Smith   Mat_SeqBAIJ *b;
17383b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
17393b2fbd54SBarry Smith 
17403b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
17413b2fbd54SBarry Smith   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1742b6490206SBarry Smith 
1743f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1744f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
17452593348eSBarry Smith 
17462593348eSBarry Smith   *A = 0;
1747b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
17482593348eSBarry Smith   PLogObjectCreate(B);
1749b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
175044cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
17512593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17521a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
17531a6a6d98SBarry Smith   if (!flg) {
17547fc0212eSBarry Smith     switch (bs) {
17557fc0212eSBarry Smith       case 1:
17567fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17571a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
175839b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1759f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
17607fc0212eSBarry Smith        break;
17614eeb42bcSBarry Smith       case 2:
17624eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
17631a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
176439b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1765f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
17666d84be18SBarry Smith         break;
1767f327dd97SBarry Smith       case 3:
1768f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
17691a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
177039b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1771f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
17724eeb42bcSBarry Smith         break;
17736d84be18SBarry Smith       case 4:
17746d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
17751a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
177639b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1777f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
17786d84be18SBarry Smith         break;
17796d84be18SBarry Smith       case 5:
17806d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
17811a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
178239b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1783f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
17846d84be18SBarry Smith         break;
17857fc0212eSBarry Smith     }
17861a6a6d98SBarry Smith   }
1787b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1788b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
17892593348eSBarry Smith   B->factor           = 0;
17902593348eSBarry Smith   B->lupivotthreshold = 1.0;
179190f02eecSBarry Smith   B->mapping          = 0;
17922593348eSBarry Smith   b->row              = 0;
17932593348eSBarry Smith   b->col              = 0;
17942593348eSBarry Smith   b->reallocs         = 0;
17952593348eSBarry Smith 
179644cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
179744cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1798b6490206SBarry Smith   b->mbs     = mbs;
1799f2501298SSatish Balay   b->nbs     = nbs;
1800b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
18012593348eSBarry Smith   if (nnz == PETSC_NULL) {
1802119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
18032593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1804b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1805b6490206SBarry Smith     nz = nz*mbs;
18062593348eSBarry Smith   }
18072593348eSBarry Smith   else {
18082593348eSBarry Smith     nz = 0;
1809b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
18102593348eSBarry Smith   }
18112593348eSBarry Smith 
18122593348eSBarry Smith   /* allocate the matrix space */
18137e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
18142593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
18157e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
18167e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
18172593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
18182593348eSBarry Smith   b->i  = b->j + nz;
18192593348eSBarry Smith   b->singlemalloc = 1;
18202593348eSBarry Smith 
1821de6a44a3SBarry Smith   b->i[0] = 0;
1822b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
18232593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
18242593348eSBarry Smith   }
18252593348eSBarry Smith 
1826b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1827b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1828b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1829b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
18302593348eSBarry Smith 
1831b6490206SBarry Smith   b->bs               = bs;
18329df24120SSatish Balay   b->bs2              = bs2;
1833b6490206SBarry Smith   b->mbs              = mbs;
18342593348eSBarry Smith   b->nz               = 0;
183518e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
18362593348eSBarry Smith   b->sorted           = 0;
18372593348eSBarry Smith   b->roworiented      = 1;
18382593348eSBarry Smith   b->nonew            = 0;
18392593348eSBarry Smith   b->diag             = 0;
18402593348eSBarry Smith   b->solve_work       = 0;
1841de6a44a3SBarry Smith   b->mult_work        = 0;
18422593348eSBarry Smith   b->spptr            = 0;
18434e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
18444e220ebcSLois Curfman McInnes 
18452593348eSBarry Smith   *A = B;
18462593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
18472593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
18482593348eSBarry Smith   return 0;
18492593348eSBarry Smith }
18502593348eSBarry Smith 
1851b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
18522593348eSBarry Smith {
18532593348eSBarry Smith   Mat         C;
1854b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
18559df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1856de6a44a3SBarry Smith 
1857de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
18582593348eSBarry Smith 
18592593348eSBarry Smith   *B = 0;
1860b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
18612593348eSBarry Smith   PLogObjectCreate(C);
1862b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
18632593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1864b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1865b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
18662593348eSBarry Smith   C->factor     = A->factor;
18672593348eSBarry Smith   c->row        = 0;
18682593348eSBarry Smith   c->col        = 0;
18692593348eSBarry Smith   C->assembled  = PETSC_TRUE;
18702593348eSBarry Smith 
187144cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
187244cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
187344cd7ae7SLois Curfman McInnes   C->M          = a->m;
187444cd7ae7SLois Curfman McInnes   C->N          = a->n;
187544cd7ae7SLois Curfman McInnes 
1876b6490206SBarry Smith   c->bs         = a->bs;
18779df24120SSatish Balay   c->bs2        = a->bs2;
1878b6490206SBarry Smith   c->mbs        = a->mbs;
1879b6490206SBarry Smith   c->nbs        = a->nbs;
18802593348eSBarry Smith 
1881b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1882b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1883b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
18842593348eSBarry Smith     c->imax[i] = a->imax[i];
18852593348eSBarry Smith     c->ilen[i] = a->ilen[i];
18862593348eSBarry Smith   }
18872593348eSBarry Smith 
18882593348eSBarry Smith   /* allocate the matrix space */
18892593348eSBarry Smith   c->singlemalloc = 1;
18907e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
18912593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
18927e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1893de6a44a3SBarry Smith   c->i  = c->j + nz;
1894b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1895b6490206SBarry Smith   if (mbs > 0) {
1896de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
18972593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
18987e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
18992593348eSBarry Smith     }
19002593348eSBarry Smith   }
19012593348eSBarry Smith 
1902b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
19032593348eSBarry Smith   c->sorted      = a->sorted;
19042593348eSBarry Smith   c->roworiented = a->roworiented;
19052593348eSBarry Smith   c->nonew       = a->nonew;
19062593348eSBarry Smith 
19072593348eSBarry Smith   if (a->diag) {
1908b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1909b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1910b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
19112593348eSBarry Smith       c->diag[i] = a->diag[i];
19122593348eSBarry Smith     }
19132593348eSBarry Smith   }
19142593348eSBarry Smith   else c->diag          = 0;
19152593348eSBarry Smith   c->nz                 = a->nz;
19162593348eSBarry Smith   c->maxnz              = a->maxnz;
19172593348eSBarry Smith   c->solve_work         = 0;
19182593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
19197fc0212eSBarry Smith   c->mult_work          = 0;
19202593348eSBarry Smith   *B = C;
19212593348eSBarry Smith   return 0;
19222593348eSBarry Smith }
19232593348eSBarry Smith 
192419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
19252593348eSBarry Smith {
1926b6490206SBarry Smith   Mat_SeqBAIJ  *a;
19272593348eSBarry Smith   Mat          B;
1928de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1929b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
193035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1931a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1932b6490206SBarry Smith   Scalar       *aa;
193319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
19342593348eSBarry Smith 
19351a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1936de6a44a3SBarry Smith   bs2  = bs*bs;
1937b6490206SBarry Smith 
19382593348eSBarry Smith   MPI_Comm_size(comm,&size);
1939b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
194090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
194177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1942de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
19432593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
19442593348eSBarry Smith 
1945b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
194635aab85fSBarry Smith 
194735aab85fSBarry Smith   /*
194835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
194935aab85fSBarry Smith     divisible by the blocksize
195035aab85fSBarry Smith   */
1951b6490206SBarry Smith   mbs        = M/bs;
195235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
195335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
195435aab85fSBarry Smith   else                  mbs++;
195535aab85fSBarry Smith   if (extra_rows) {
1956537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
195735aab85fSBarry Smith   }
1958b6490206SBarry Smith 
19592593348eSBarry Smith   /* read in row lengths */
196035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
196177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
196235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
19632593348eSBarry Smith 
1964b6490206SBarry Smith   /* read in column indices */
196535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
196677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
196735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1968b6490206SBarry Smith 
1969b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1970b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1971b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
197235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
197335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
197435aab85fSBarry Smith   masked = mask + mbs;
1975b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1976b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
197735aab85fSBarry Smith     nmask = 0;
1978b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1979b6490206SBarry Smith       kmax = rowlengths[rowcount];
1980b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
198135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
198235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1983b6490206SBarry Smith       }
1984b6490206SBarry Smith       rowcount++;
1985b6490206SBarry Smith     }
198635aab85fSBarry Smith     browlengths[i] += nmask;
198735aab85fSBarry Smith     /* zero out the mask elements we set */
198835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1989b6490206SBarry Smith   }
1990b6490206SBarry Smith 
19912593348eSBarry Smith   /* create our matrix */
199235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
199335aab85fSBarry Smith          CHKERRQ(ierr);
19942593348eSBarry Smith   B = *A;
1995b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
19962593348eSBarry Smith 
19972593348eSBarry Smith   /* set matrix "i" values */
1998de6a44a3SBarry Smith   a->i[0] = 0;
1999b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
2000b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
2001b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
20022593348eSBarry Smith   }
2003b6490206SBarry Smith   a->nz         = 0;
2004b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
20052593348eSBarry Smith 
2006b6490206SBarry Smith   /* read in nonzero values */
200735aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
200877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
200935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2010b6490206SBarry Smith 
2011b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2012b6490206SBarry Smith   nzcount = 0; jcount = 0;
2013b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2014b6490206SBarry Smith     nzcountb = nzcount;
201535aab85fSBarry Smith     nmask    = 0;
2016b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2017b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2018b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
201935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
202035aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2021b6490206SBarry Smith       }
2022b6490206SBarry Smith       rowcount++;
2023b6490206SBarry Smith     }
2024de6a44a3SBarry Smith     /* sort the masked values */
202577c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2026de6a44a3SBarry Smith 
2027b6490206SBarry Smith     /* set "j" values into matrix */
2028b6490206SBarry Smith     maskcount = 1;
202935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
203035aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2031de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2032b6490206SBarry Smith     }
2033b6490206SBarry Smith     /* set "a" values into matrix */
2034de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2035b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2036b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2037b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2038de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2039de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2040de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2041de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2042b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2043b6490206SBarry Smith       }
2044b6490206SBarry Smith     }
204535aab85fSBarry Smith     /* zero out the mask elements we set */
204635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2047b6490206SBarry Smith   }
204835aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2049b6490206SBarry Smith 
2050b6490206SBarry Smith   PetscFree(rowlengths);
2051b6490206SBarry Smith   PetscFree(browlengths);
2052b6490206SBarry Smith   PetscFree(aa);
2053b6490206SBarry Smith   PetscFree(jj);
2054b6490206SBarry Smith   PetscFree(mask);
2055b6490206SBarry Smith 
2056b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2057de6a44a3SBarry Smith 
2058de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2059de6a44a3SBarry Smith   if (flg) {
206019bcc07fSBarry Smith     Viewer tviewer;
206119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2062639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
206319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
206419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2065de6a44a3SBarry Smith   }
2066de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2067de6a44a3SBarry Smith   if (flg) {
206819bcc07fSBarry Smith     Viewer tviewer;
206919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2070639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
207119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
207219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2073de6a44a3SBarry Smith   }
2074de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2075de6a44a3SBarry Smith   if (flg) {
207619bcc07fSBarry Smith     Viewer tviewer;
207719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
207819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
207919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2080de6a44a3SBarry Smith   }
2081de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2082de6a44a3SBarry Smith   if (flg) {
208319bcc07fSBarry Smith     Viewer tviewer;
208419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2085639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
208619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
208719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2088de6a44a3SBarry Smith   }
2089de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2090de6a44a3SBarry Smith   if (flg) {
209119bcc07fSBarry Smith     Viewer tviewer;
209219bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
209319bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
209419bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
209519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2096de6a44a3SBarry Smith   }
20972593348eSBarry Smith   return 0;
20982593348eSBarry Smith }
20992593348eSBarry Smith 
21002593348eSBarry Smith 
21012593348eSBarry Smith 
2102