xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 90f02eec332fcca4c33b4e7b21cabed76bf0614e)
12593348eSBarry Smith #ifndef lint
2*90f02eecSBarry Smith static char vcid[] = "$Id: baij.c,v 1.70 1996/11/13 21:20:56 bsmith Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
19de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
20de6a44a3SBarry Smith {
21de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
23de6a44a3SBarry Smith 
24de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
267fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
27de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28de6a44a3SBarry Smith       if (a->j[j] == i) {
29de6a44a3SBarry Smith         diag[i] = j;
30de6a44a3SBarry Smith         break;
31de6a44a3SBarry Smith       }
32de6a44a3SBarry Smith     }
33de6a44a3SBarry Smith   }
34de6a44a3SBarry Smith   a->diag = diag;
35de6a44a3SBarry Smith   return 0;
36de6a44a3SBarry Smith }
372593348eSBarry Smith 
382593348eSBarry Smith #include "draw.h"
392593348eSBarry Smith #include "pinclude/pviewer.h"
4077c4ece6SBarry Smith #include "sys.h"
412593348eSBarry Smith 
423b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
433b2fbd54SBarry Smith 
443b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
453b2fbd54SBarry Smith                             PetscTruth *done)
463b2fbd54SBarry Smith {
473b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
483b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
493b2fbd54SBarry Smith 
503b2fbd54SBarry Smith   *nn = n;
513b2fbd54SBarry Smith   if (!ia) return 0;
523b2fbd54SBarry Smith   if (symmetric) {
533b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
543b2fbd54SBarry Smith   } else if (oshift == 1) {
553b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
563b2fbd54SBarry Smith     int nz = a->i[n] + 1;
573b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
583b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
593b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
603b2fbd54SBarry Smith   } else {
613b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
623b2fbd54SBarry Smith   }
633b2fbd54SBarry Smith 
643b2fbd54SBarry Smith   return 0;
653b2fbd54SBarry Smith }
663b2fbd54SBarry Smith 
673b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
683b2fbd54SBarry Smith                                 PetscTruth *done)
693b2fbd54SBarry Smith {
703b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
713b2fbd54SBarry Smith   int         i,n = a->mbs;
723b2fbd54SBarry Smith 
733b2fbd54SBarry Smith   if (!ia) return 0;
743b2fbd54SBarry Smith   if (symmetric) {
753b2fbd54SBarry Smith     PetscFree(*ia);
763b2fbd54SBarry Smith     PetscFree(*ja);
77af5da2bfSBarry Smith   } else if (oshift == 1) {
783b2fbd54SBarry Smith     int nz = a->i[n];
793b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
803b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
813b2fbd54SBarry Smith   }
823b2fbd54SBarry Smith   return 0;
833b2fbd54SBarry Smith }
843b2fbd54SBarry Smith 
853b2fbd54SBarry Smith 
86b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
872593348eSBarry Smith {
88b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
899df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90b6490206SBarry Smith   Scalar      *aa;
912593348eSBarry Smith 
9290ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
932593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
942593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
953b2fbd54SBarry Smith 
962593348eSBarry Smith   col_lens[1] = a->m;
972593348eSBarry Smith   col_lens[2] = a->n;
987e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
992593348eSBarry Smith 
1002593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
101b6490206SBarry Smith   count = 0;
102b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
103b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
104b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105b6490206SBarry Smith     }
1062593348eSBarry Smith   }
10777c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1082593348eSBarry Smith   PetscFree(col_lens);
1092593348eSBarry Smith 
1102593348eSBarry Smith   /* store column indices (zero start index) */
1117e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112b6490206SBarry Smith   count = 0;
113b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
114b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
115b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
117b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1182593348eSBarry Smith         }
1192593348eSBarry Smith       }
120b6490206SBarry Smith     }
121b6490206SBarry Smith   }
1227e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123b6490206SBarry Smith   PetscFree(jj);
1242593348eSBarry Smith 
1252593348eSBarry Smith   /* store nonzero values */
1267e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127b6490206SBarry Smith   count = 0;
128b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
129b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
130b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1327e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
133b6490206SBarry Smith         }
134b6490206SBarry Smith       }
135b6490206SBarry Smith     }
136b6490206SBarry Smith   }
1377e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138b6490206SBarry Smith   PetscFree(aa);
1392593348eSBarry Smith   return 0;
1402593348eSBarry Smith }
1412593348eSBarry Smith 
142b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1432593348eSBarry Smith {
144b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1459df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1462593348eSBarry Smith   FILE        *fd;
1472593348eSBarry Smith   char        *outputname;
1482593348eSBarry Smith 
14990ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1502593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
15190ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
152639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1537ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1542593348eSBarry Smith   }
155639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1572593348eSBarry Smith   }
158639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
15944cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
16044cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
16144cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
16244cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
16344cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
16444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
16544cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
16644cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
16744cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
16844cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
16944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
17044cd7ae7SLois Curfman McInnes #else
17144cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
17244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
17344cd7ae7SLois Curfman McInnes #endif
17444cd7ae7SLois Curfman McInnes           }
17544cd7ae7SLois Curfman McInnes         }
17644cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
17744cd7ae7SLois Curfman McInnes       }
17844cd7ae7SLois Curfman McInnes     }
17944cd7ae7SLois Curfman McInnes   }
1802593348eSBarry Smith   else {
181b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
182b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
183b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
184b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
18688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1877e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
18888685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1897e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
19088685aaeSLois Curfman McInnes           }
19188685aaeSLois Curfman McInnes           else {
1927e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
19388685aaeSLois Curfman McInnes           }
19488685aaeSLois Curfman McInnes #else
1957e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
19688685aaeSLois Curfman McInnes #endif
1972593348eSBarry Smith           }
1982593348eSBarry Smith         }
1992593348eSBarry Smith         fprintf(fd,"\n");
2002593348eSBarry Smith       }
2012593348eSBarry Smith     }
202b6490206SBarry Smith   }
2032593348eSBarry Smith   fflush(fd);
2042593348eSBarry Smith   return 0;
2052593348eSBarry Smith }
2062593348eSBarry Smith 
2073270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2083270192aSSatish Balay {
2093270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2103270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2113270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2123270192aSSatish Balay   Scalar       *aa;
2133270192aSSatish Balay   Draw         draw;
2143270192aSSatish Balay   DrawButton   button;
2153270192aSSatish Balay   PetscTruth   isnull;
2163270192aSSatish Balay 
2173270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2183270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2193270192aSSatish Balay 
2203270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2213270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2223270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2233270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2243270192aSSatish Balay   color = DRAW_BLUE;
2253270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2263270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2273270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2283270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2293270192aSSatish Balay       aa = a->a + j*bs2;
2303270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2313270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2323270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
2333270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2343270192aSSatish Balay         }
2353270192aSSatish Balay       }
2363270192aSSatish Balay     }
2373270192aSSatish Balay   }
2383270192aSSatish Balay   color = DRAW_CYAN;
2393270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2403270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2413270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2423270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2433270192aSSatish Balay       aa = a->a + j*bs2;
2443270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2453270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2463270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
2473270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2483270192aSSatish Balay         }
2493270192aSSatish Balay       }
2503270192aSSatish Balay     }
2513270192aSSatish Balay   }
2523270192aSSatish Balay 
2533270192aSSatish Balay   color = DRAW_RED;
2543270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2553270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2563270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2573270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2583270192aSSatish Balay       aa = a->a + j*bs2;
2593270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2603270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2613270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
2623270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2633270192aSSatish Balay         }
2643270192aSSatish Balay       }
2653270192aSSatish Balay     }
2663270192aSSatish Balay   }
2673270192aSSatish Balay 
2683270192aSSatish Balay   DrawFlush(draw);
2693270192aSSatish Balay   DrawGetPause(draw,&pause);
2703270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2713270192aSSatish Balay 
2723270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2733270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2743270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2753270192aSSatish Balay     DrawClear(draw);
2763270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2773270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2783270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2793270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2803270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2813270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2823270192aSSatish Balay     w *= scale; h *= scale;
2833270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2843270192aSSatish Balay     color = DRAW_BLUE;
2853270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2863270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2873270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2883270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
2893270192aSSatish Balay         aa = a->a + j*bs2;
2903270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
2913270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
2923270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
2933270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2943270192aSSatish Balay           }
2953270192aSSatish Balay         }
2963270192aSSatish Balay       }
2973270192aSSatish Balay     }
2983270192aSSatish Balay     color = DRAW_CYAN;
2993270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3003270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3013270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3023270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3033270192aSSatish Balay         aa = a->a + j*bs2;
3043270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3053270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3063270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
3073270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3083270192aSSatish Balay           }
3093270192aSSatish Balay         }
3103270192aSSatish Balay       }
3113270192aSSatish Balay     }
3123270192aSSatish Balay 
3133270192aSSatish Balay     color = DRAW_RED;
3143270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3153270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3163270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3173270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3183270192aSSatish Balay         aa = a->a + j*bs2;
3193270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3203270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3213270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
3223270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3233270192aSSatish Balay           }
3243270192aSSatish Balay         }
3253270192aSSatish Balay       }
3263270192aSSatish Balay     }
3273270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3283270192aSSatish Balay   }
3293270192aSSatish Balay   return 0;
3303270192aSSatish Balay }
3313270192aSSatish Balay 
332b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3332593348eSBarry Smith {
3342593348eSBarry Smith   Mat         A = (Mat) obj;
33519bcc07fSBarry Smith   ViewerType  vtype;
33619bcc07fSBarry Smith   int         ierr;
3372593348eSBarry Smith 
33819bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
33919bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
340b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
3412593348eSBarry Smith   }
34219bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3442593348eSBarry Smith   }
34519bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
346b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3472593348eSBarry Smith   }
34819bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3493270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3502593348eSBarry Smith   }
3512593348eSBarry Smith   return 0;
3522593348eSBarry Smith }
353b6490206SBarry Smith 
354119a934aSSatish Balay #define CHUNKSIZE  10
355cd0e1443SSatish Balay 
356cd0e1443SSatish Balay /* This version has row oriented v  */
357639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
358cd0e1443SSatish Balay {
359cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
361cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
362a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3639df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
364cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
365cd0e1443SSatish Balay 
366cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
367cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3683b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
369cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
370cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
3713b2fbd54SBarry Smith #endif
372a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
373cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
374cd0e1443SSatish Balay     low = 0;
375cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3763b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
377cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
378cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
3793b2fbd54SBarry Smith #endif
380a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
381cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
382cd0e1443SSatish Balay       if (roworiented) {
383cd0e1443SSatish Balay         value = *v++;
384cd0e1443SSatish Balay       }
385cd0e1443SSatish Balay       else {
386cd0e1443SSatish Balay         value = v[k + l*m];
387cd0e1443SSatish Balay       }
388cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
389cd0e1443SSatish Balay       while (high-low > 5) {
390cd0e1443SSatish Balay         t = (low+high)/2;
391cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
392cd0e1443SSatish Balay         else              low  = t;
393cd0e1443SSatish Balay       }
394cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
395cd0e1443SSatish Balay         if (rp[i] > bcol) break;
396cd0e1443SSatish Balay         if (rp[i] == bcol) {
3977e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
398cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
399cd0e1443SSatish Balay           else                  *bap  = value;
400cd0e1443SSatish Balay           goto noinsert;
401cd0e1443SSatish Balay         }
402cd0e1443SSatish Balay       }
403cd0e1443SSatish Balay       if (nonew) goto noinsert;
404cd0e1443SSatish Balay       if (nrow >= rmax) {
405cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
406cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
407cd0e1443SSatish Balay         Scalar *new_a;
408cd0e1443SSatish Balay 
409cd0e1443SSatish Balay         /* malloc new storage space */
4107e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
411cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4127e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
413cd0e1443SSatish Balay         new_i   = new_j + new_nz;
414cd0e1443SSatish Balay 
415cd0e1443SSatish Balay         /* copy over old data into new slots */
416cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4177e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
418a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
419a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
420a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
421cd0e1443SSatish Balay                                                            len*sizeof(int));
422a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
423a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
424a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
425a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
426cd0e1443SSatish Balay         /* free up old matrix storage */
427cd0e1443SSatish Balay         PetscFree(a->a);
428cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
429cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
430cd0e1443SSatish Balay         a->singlemalloc = 1;
431cd0e1443SSatish Balay 
432a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
433cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4347e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
43518e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
436cd0e1443SSatish Balay         a->reallocs++;
437119a934aSSatish Balay         a->nz++;
438cd0e1443SSatish Balay       }
4397e67e3f9SSatish Balay       N = nrow++ - 1;
440cd0e1443SSatish Balay       /* shift up all the later entries in this row */
441cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
442cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4437e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
444cd0e1443SSatish Balay       }
4457e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
446cd0e1443SSatish Balay       rp[i]                      = bcol;
4477e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
448cd0e1443SSatish Balay       noinsert:;
449cd0e1443SSatish Balay       low = i;
450cd0e1443SSatish Balay     }
451cd0e1443SSatish Balay     ailen[brow] = nrow;
452cd0e1443SSatish Balay   }
453cd0e1443SSatish Balay   return 0;
454cd0e1443SSatish Balay }
455cd0e1443SSatish Balay 
4560b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4570b824a48SSatish Balay {
4580b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4590b824a48SSatish Balay   *m = a->m; *n = a->n;
4600b824a48SSatish Balay   return 0;
4610b824a48SSatish Balay }
4620b824a48SSatish Balay 
4630b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4640b824a48SSatish Balay {
4650b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4660b824a48SSatish Balay   *m = 0; *n = a->m;
4670b824a48SSatish Balay   return 0;
4680b824a48SSatish Balay }
4690b824a48SSatish Balay 
4709867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4719867e207SSatish Balay {
4729867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4737e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4749867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4759867e207SSatish Balay 
4769867e207SSatish Balay   bs  = a->bs;
4779867e207SSatish Balay   ai  = a->i;
4789867e207SSatish Balay   aj  = a->j;
4799867e207SSatish Balay   aa  = a->a;
4809df24120SSatish Balay   bs2 = a->bs2;
4819867e207SSatish Balay 
4829867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
4839867e207SSatish Balay 
4849867e207SSatish Balay   bn  = row/bs;   /* Block number */
4859867e207SSatish Balay   bp  = row % bs; /* Block Position */
4869867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
4879867e207SSatish Balay   *nz = bs*M;
4889867e207SSatish Balay 
4899867e207SSatish Balay   if (v) {
4909867e207SSatish Balay     *v = 0;
4919867e207SSatish Balay     if (*nz) {
4929867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
4939867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
4949867e207SSatish Balay         v_i  = *v + i*bs;
4957e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
4967e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
4979867e207SSatish Balay       }
4989867e207SSatish Balay     }
4999867e207SSatish Balay   }
5009867e207SSatish Balay 
5019867e207SSatish Balay   if (idx) {
5029867e207SSatish Balay     *idx = 0;
5039867e207SSatish Balay     if (*nz) {
5049867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5059867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5069867e207SSatish Balay         idx_i = *idx + i*bs;
5079867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5089867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5099867e207SSatish Balay       }
5109867e207SSatish Balay     }
5119867e207SSatish Balay   }
5129867e207SSatish Balay   return 0;
5139867e207SSatish Balay }
5149867e207SSatish Balay 
5159867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5169867e207SSatish Balay {
5179867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5189867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5199867e207SSatish Balay   return 0;
5209867e207SSatish Balay }
521b6490206SBarry Smith 
522f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
523f2501298SSatish Balay {
524f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
525f2501298SSatish Balay   Mat         C;
526f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5279df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
528f2501298SSatish Balay   Scalar      *array=a->a;
529f2501298SSatish Balay 
530f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
531f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
532f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
533f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
534a7c10996SSatish Balay 
535a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
536f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
537f2501298SSatish Balay   PetscFree(col);
538f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
539f2501298SSatish Balay   cols = rows + bs;
540f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
541f2501298SSatish Balay     cols[0] = i*bs;
542f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
543f2501298SSatish Balay     len = ai[i+1] - ai[i];
544f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
545f2501298SSatish Balay       rows[0] = (*aj++)*bs;
546f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
547f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
548f2501298SSatish Balay       array += bs2;
549f2501298SSatish Balay     }
550f2501298SSatish Balay   }
5511073c447SSatish Balay   PetscFree(rows);
552f2501298SSatish Balay 
5536d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5546d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555f2501298SSatish Balay 
556f2501298SSatish Balay   if (B != PETSC_NULL) {
557f2501298SSatish Balay     *B = C;
558f2501298SSatish Balay   } else {
559f2501298SSatish Balay     /* This isn't really an in-place transpose */
560f2501298SSatish Balay     PetscFree(a->a);
561f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
562f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
563f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
564f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
565f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
566f2501298SSatish Balay     PetscFree(a);
567f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
568f2501298SSatish Balay     PetscHeaderDestroy(C);
569f2501298SSatish Balay   }
570f2501298SSatish Balay   return 0;
571f2501298SSatish Balay }
572f2501298SSatish Balay 
573f2501298SSatish Balay 
574584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
575584200bdSSatish Balay {
576584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
577584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
578a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
5799df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
580584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
581584200bdSSatish Balay 
5826d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
583584200bdSSatish Balay 
584584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
585584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
586584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
587584200bdSSatish Balay     if (fshift) {
588a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
589584200bdSSatish Balay       N = ailen[i];
590584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
591584200bdSSatish Balay         ip[j-fshift] = ip[j];
5927e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
593584200bdSSatish Balay       }
594584200bdSSatish Balay     }
595584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
596584200bdSSatish Balay   }
597584200bdSSatish Balay   if (mbs) {
598584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
599584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
600584200bdSSatish Balay   }
601584200bdSSatish Balay   /* reset ilen and imax for each row */
602584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
603584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
604584200bdSSatish Balay   }
605a7c10996SSatish Balay   a->nz = ai[mbs];
606584200bdSSatish Balay 
607584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
608584200bdSSatish Balay   if (fshift && a->diag) {
609584200bdSSatish Balay     PetscFree(a->diag);
610584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
611584200bdSSatish Balay     a->diag = 0;
612584200bdSSatish Balay   }
613537820f0SBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
614537820f0SBarry Smith            fshift*bs2,a->nz*bs2,m,a->bs);
615584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
616584200bdSSatish Balay            a->reallocs);
6174e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
6184e220ebcSLois Curfman McInnes 
619584200bdSSatish Balay   return 0;
620584200bdSSatish Balay }
621584200bdSSatish Balay 
622b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6232593348eSBarry Smith {
624b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6259df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6262593348eSBarry Smith   return 0;
6272593348eSBarry Smith }
6282593348eSBarry Smith 
629b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6302593348eSBarry Smith {
6312593348eSBarry Smith   Mat         A  = (Mat) obj;
632b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
633*90f02eecSBarry Smith   int         ierr;
6342593348eSBarry Smith 
6352593348eSBarry Smith #if defined(PETSC_LOG)
6362593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6372593348eSBarry Smith #endif
6382593348eSBarry Smith   PetscFree(a->a);
6392593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6402593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6412593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6422593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6432593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
644de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6452593348eSBarry Smith   PetscFree(a);
646*90f02eecSBarry Smith   if (A->mapping) {
647*90f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
648*90f02eecSBarry Smith   }
649f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
650f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6512593348eSBarry Smith   return 0;
6522593348eSBarry Smith }
6532593348eSBarry Smith 
654b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6552593348eSBarry Smith {
656b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6576d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6586d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6596d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
6606d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6616d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6626d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
6636d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
6646d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
665*90f02eecSBarry Smith            op == MAT_YES_NEW_DIAGONALS ||
666*90f02eecSBarry Smith            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
66794a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
6686d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
6696d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
6702593348eSBarry Smith   else
671b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
6722593348eSBarry Smith   return 0;
6732593348eSBarry Smith }
6742593348eSBarry Smith 
6752593348eSBarry Smith 
6762593348eSBarry Smith /* -------------------------------------------------------*/
6772593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
6782593348eSBarry Smith /* -------------------------------------------------------*/
679b6490206SBarry Smith #include "pinclude/plapack.h"
680b6490206SBarry Smith 
68139b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
6822593348eSBarry Smith {
683b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
68439b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
685c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
6862593348eSBarry Smith 
687c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
688c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
689b6490206SBarry Smith 
690119a934aSSatish Balay   idx   = a->j;
691119a934aSSatish Balay   v     = a->a;
692119a934aSSatish Balay   ii    = a->i;
693119a934aSSatish Balay 
694119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
695119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
696119a934aSSatish Balay     sum  = 0.0;
697119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
6981a6a6d98SBarry Smith     z[i] = sum;
699119a934aSSatish Balay   }
700c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
701c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
70239b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
70339b95ed1SBarry Smith   return 0;
70439b95ed1SBarry Smith }
70539b95ed1SBarry Smith 
70639b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
70739b95ed1SBarry Smith {
70839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
70939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
71039b95ed1SBarry Smith   register Scalar x1,x2;
71139b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
712c16cb8f2SBarry Smith   int             j,n;
71339b95ed1SBarry Smith 
714c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
715c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
71639b95ed1SBarry Smith 
71739b95ed1SBarry Smith   idx   = a->j;
71839b95ed1SBarry Smith   v     = a->a;
71939b95ed1SBarry Smith   ii    = a->i;
72039b95ed1SBarry Smith 
721119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
722119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
723119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
724119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
725119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
726119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
727119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
728119a934aSSatish Balay       v += 4;
729119a934aSSatish Balay     }
7301a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
731119a934aSSatish Balay     z += 2;
732119a934aSSatish Balay   }
733c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
734c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
73539b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
73639b95ed1SBarry Smith   return 0;
73739b95ed1SBarry Smith }
73839b95ed1SBarry Smith 
73939b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
74039b95ed1SBarry Smith {
74139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
74239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
743c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
74439b95ed1SBarry Smith 
745c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
746c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
74739b95ed1SBarry Smith 
74839b95ed1SBarry Smith   idx   = a->j;
74939b95ed1SBarry Smith   v     = a->a;
75039b95ed1SBarry Smith   ii    = a->i;
75139b95ed1SBarry Smith 
752119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
753119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
754119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
755119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
756119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
758119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
759119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
760119a934aSSatish Balay       v += 9;
761119a934aSSatish Balay     }
7621a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
763119a934aSSatish Balay     z += 3;
764119a934aSSatish Balay   }
765c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
766c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
76739b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
76839b95ed1SBarry Smith   return 0;
76939b95ed1SBarry Smith }
77039b95ed1SBarry Smith 
77139b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
77239b95ed1SBarry Smith {
77339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
77439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
77539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
77639b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
777c16cb8f2SBarry Smith   int             j,n;
77839b95ed1SBarry Smith 
779c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
780c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
78139b95ed1SBarry Smith 
78239b95ed1SBarry Smith   idx   = a->j;
78339b95ed1SBarry Smith   v     = a->a;
78439b95ed1SBarry Smith   ii    = a->i;
78539b95ed1SBarry Smith 
786119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
787119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
788119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
789119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
790119a934aSSatish Balay       xb = x + 4*(*idx++);
791119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
792119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
793119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
794119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
795119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
796119a934aSSatish Balay       v += 16;
797119a934aSSatish Balay     }
7981a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
799119a934aSSatish Balay     z += 4;
800119a934aSSatish Balay   }
801c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
802c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
80339b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
80439b95ed1SBarry Smith   return 0;
80539b95ed1SBarry Smith }
80639b95ed1SBarry Smith 
80739b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
80839b95ed1SBarry Smith {
80939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
81039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
81139b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
812c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
81339b95ed1SBarry Smith 
814c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
815c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
81639b95ed1SBarry Smith 
81739b95ed1SBarry Smith   idx   = a->j;
81839b95ed1SBarry Smith   v     = a->a;
81939b95ed1SBarry Smith   ii    = a->i;
82039b95ed1SBarry Smith 
821119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
822119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
823119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
824119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
825119a934aSSatish Balay       xb = x + 5*(*idx++);
826119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
827119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
828119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
829119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
830119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
831119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
832119a934aSSatish Balay       v += 25;
833119a934aSSatish Balay     }
8341a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
835119a934aSSatish Balay     z += 5;
836119a934aSSatish Balay   }
837c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
838c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
83939b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
84039b95ed1SBarry Smith   return 0;
84139b95ed1SBarry Smith }
84239b95ed1SBarry Smith 
84339b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
84439b95ed1SBarry Smith {
84539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
84639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
847c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
848c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
849c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
85039b95ed1SBarry Smith 
851c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
852c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
85339b95ed1SBarry Smith 
85439b95ed1SBarry Smith   idx   = a->j;
85539b95ed1SBarry Smith   v     = a->a;
85639b95ed1SBarry Smith   ii    = a->i;
85739b95ed1SBarry Smith 
85839b95ed1SBarry Smith 
859119a934aSSatish Balay   if (!a->mult_work) {
8603b547af2SSatish Balay     k = PetscMax(a->m,a->n);
8612b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
862119a934aSSatish Balay   }
863119a934aSSatish Balay   work = a->mult_work;
864119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
865119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
866119a934aSSatish Balay     ncols = n*bs;
867119a934aSSatish Balay     workt = work;
868119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
869119a934aSSatish Balay       xb = x + bs*(*idx++);
870119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
871119a934aSSatish Balay       workt += bs;
872119a934aSSatish Balay     }
8731a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
874119a934aSSatish Balay     v += n*bs2;
875119a934aSSatish Balay     z += bs;
876119a934aSSatish Balay   }
877c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
878c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
8791a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
880bb42667fSSatish Balay   return 0;
881bb42667fSSatish Balay }
882bb42667fSSatish Balay 
883f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
884f44d3a6dSSatish Balay {
885f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
886f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
887c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
888f44d3a6dSSatish Balay 
889c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
890c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
891c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
892f44d3a6dSSatish Balay 
893f44d3a6dSSatish Balay   idx   = a->j;
894f44d3a6dSSatish Balay   v     = a->a;
895f44d3a6dSSatish Balay   ii    = a->i;
896f44d3a6dSSatish Balay 
897f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
898f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
899f44d3a6dSSatish Balay     sum  = y[i];
900f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
901f44d3a6dSSatish Balay     z[i] = sum;
902f44d3a6dSSatish Balay   }
903c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
904c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
905c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
906f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
907f44d3a6dSSatish Balay   return 0;
908f44d3a6dSSatish Balay }
909f44d3a6dSSatish Balay 
910f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
911f44d3a6dSSatish Balay {
912f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
913f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
914f44d3a6dSSatish Balay   register Scalar x1,x2;
915f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
916c16cb8f2SBarry Smith   int             j,n;
917f44d3a6dSSatish Balay 
918c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
919c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
920c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
921f44d3a6dSSatish Balay 
922f44d3a6dSSatish Balay   idx   = a->j;
923f44d3a6dSSatish Balay   v     = a->a;
924f44d3a6dSSatish Balay   ii    = a->i;
925f44d3a6dSSatish Balay 
926f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
927f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
928f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
929f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
930f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
931f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
932f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
933f44d3a6dSSatish Balay       v += 4;
934f44d3a6dSSatish Balay     }
935f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
936f44d3a6dSSatish Balay     z += 2; y += 2;
937f44d3a6dSSatish Balay   }
938c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
939c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
940c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
941f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
942f44d3a6dSSatish Balay   return 0;
943f44d3a6dSSatish Balay }
944f44d3a6dSSatish Balay 
945f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
946f44d3a6dSSatish Balay {
947f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
948f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
949c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
950f44d3a6dSSatish Balay 
951c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
952c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
953c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
954f44d3a6dSSatish Balay 
955f44d3a6dSSatish Balay   idx   = a->j;
956f44d3a6dSSatish Balay   v     = a->a;
957f44d3a6dSSatish Balay   ii    = a->i;
958f44d3a6dSSatish Balay 
959f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
960f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
961f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
962f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
963f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
964f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
965f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
966f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
967f44d3a6dSSatish Balay       v += 9;
968f44d3a6dSSatish Balay     }
969f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
970f44d3a6dSSatish Balay     z += 3; y += 3;
971f44d3a6dSSatish Balay   }
972c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
973c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
974c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
975f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
976f44d3a6dSSatish Balay   return 0;
977f44d3a6dSSatish Balay }
978f44d3a6dSSatish Balay 
979f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
980f44d3a6dSSatish Balay {
981f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
982f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
983f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
984f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
985c16cb8f2SBarry Smith   int             j,n;
986f44d3a6dSSatish Balay 
987c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
988c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
989c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
990f44d3a6dSSatish Balay 
991f44d3a6dSSatish Balay   idx   = a->j;
992f44d3a6dSSatish Balay   v     = a->a;
993f44d3a6dSSatish Balay   ii    = a->i;
994f44d3a6dSSatish Balay 
995f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
996f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
997f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
998f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
999f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1000f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1001f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1002f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1003f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1004f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1005f44d3a6dSSatish Balay       v += 16;
1006f44d3a6dSSatish Balay     }
1007f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1008f44d3a6dSSatish Balay     z += 4; y += 4;
1009f44d3a6dSSatish Balay   }
1010c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1011c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1012c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1013f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1014f44d3a6dSSatish Balay   return 0;
1015f44d3a6dSSatish Balay }
1016f44d3a6dSSatish Balay 
1017f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1018f44d3a6dSSatish Balay {
1019f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1020f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1021f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1022c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1023f44d3a6dSSatish Balay 
1024c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1025c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1026c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1027f44d3a6dSSatish Balay 
1028f44d3a6dSSatish Balay   idx   = a->j;
1029f44d3a6dSSatish Balay   v     = a->a;
1030f44d3a6dSSatish Balay   ii    = a->i;
1031f44d3a6dSSatish Balay 
1032f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1033f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1034f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1035f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1036f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1037f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1038f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1039f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1040f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1041f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1042f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1043f44d3a6dSSatish Balay       v += 25;
1044f44d3a6dSSatish Balay     }
1045f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1046f44d3a6dSSatish Balay     z += 5; y += 5;
1047f44d3a6dSSatish Balay   }
1048c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1049c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1050c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1051f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1052f44d3a6dSSatish Balay   return 0;
1053f44d3a6dSSatish Balay }
1054f44d3a6dSSatish Balay 
1055f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1056f44d3a6dSSatish Balay {
1057f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1058f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1059f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1060f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1061f44d3a6dSSatish Balay 
1062f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1063f44d3a6dSSatish Balay 
1064c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1065c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1066f44d3a6dSSatish Balay 
1067f44d3a6dSSatish Balay   idx   = a->j;
1068f44d3a6dSSatish Balay   v     = a->a;
1069f44d3a6dSSatish Balay   ii    = a->i;
1070f44d3a6dSSatish Balay 
1071f44d3a6dSSatish Balay 
1072f44d3a6dSSatish Balay   if (!a->mult_work) {
1073f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1074f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1075f44d3a6dSSatish Balay   }
1076f44d3a6dSSatish Balay   work = a->mult_work;
1077f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1078f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1079f44d3a6dSSatish Balay     ncols = n*bs;
1080f44d3a6dSSatish Balay     workt = work;
1081f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1082f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1083f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1084f44d3a6dSSatish Balay       workt += bs;
1085f44d3a6dSSatish Balay     }
1086f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1087f44d3a6dSSatish Balay     v += n*bs2;
1088f44d3a6dSSatish Balay     z += bs;
1089f44d3a6dSSatish Balay   }
1090c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1091c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1092f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1093f44d3a6dSSatish Balay   return 0;
1094f44d3a6dSSatish Balay }
1095f44d3a6dSSatish Balay 
10961a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1097bb42667fSSatish Balay {
1098bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
10991a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1100bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1101bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
11029df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1103119a934aSSatish Balay 
1104119a934aSSatish Balay 
1105*90f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
1106*90f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1107bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1108bb42667fSSatish Balay 
1109119a934aSSatish Balay   idx   = a->j;
1110119a934aSSatish Balay   v     = a->a;
1111119a934aSSatish Balay   ii    = a->i;
1112119a934aSSatish Balay 
1113119a934aSSatish Balay   switch (bs) {
1114119a934aSSatish Balay   case 1:
1115119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1116119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1117119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1118119a934aSSatish Balay       ib = idx + ai[i];
1119119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1120bb1453f0SSatish Balay         rval    = ib[j];
1121bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1122119a934aSSatish Balay       }
1123119a934aSSatish Balay     }
1124119a934aSSatish Balay     break;
1125119a934aSSatish Balay   case 2:
1126119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1127119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1128119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1129119a934aSSatish Balay       ib = idx + ai[i];
1130119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1131119a934aSSatish Balay         rval      = ib[j]*2;
1132bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1133bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1134119a934aSSatish Balay         v += 4;
1135119a934aSSatish Balay       }
1136119a934aSSatish Balay     }
1137119a934aSSatish Balay     break;
1138119a934aSSatish Balay   case 3:
1139119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1140119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1141119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1142119a934aSSatish Balay       ib = idx + ai[i];
1143119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1144119a934aSSatish Balay         rval      = ib[j]*3;
1145bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1146bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1147bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1148119a934aSSatish Balay         v += 9;
1149119a934aSSatish Balay       }
1150119a934aSSatish Balay     }
1151119a934aSSatish Balay     break;
1152119a934aSSatish Balay   case 4:
1153119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1154119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1155119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1156119a934aSSatish Balay       ib = idx + ai[i];
1157119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1158119a934aSSatish Balay         rval      = ib[j]*4;
1159bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1160bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1161bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1162bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1163119a934aSSatish Balay         v += 16;
1164119a934aSSatish Balay       }
1165119a934aSSatish Balay     }
1166119a934aSSatish Balay     break;
1167119a934aSSatish Balay   case 5:
1168119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1169119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1170119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1172119a934aSSatish Balay       ib = idx + ai[i];
1173119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1174119a934aSSatish Balay         rval      = ib[j]*5;
1175bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180119a934aSSatish Balay         v += 25;
1181119a934aSSatish Balay       }
1182119a934aSSatish Balay     }
1183119a934aSSatish Balay     break;
1184119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1185119a934aSSatish Balay     default: {
1186119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1187119a934aSSatish Balay       if (!a->mult_work) {
11883b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1189bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1190119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1191119a934aSSatish Balay       }
1192119a934aSSatish Balay       work = a->mult_work;
1193119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1194119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1195119a934aSSatish Balay         ncols = n*bs;
1196119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1197119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1198119a934aSSatish Balay         v += n*bs2;
1199119a934aSSatish Balay         x += bs;
1200119a934aSSatish Balay         workt = work;
1201119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1202119a934aSSatish Balay           zb = z + bs*(*idx++);
1203bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1204119a934aSSatish Balay           workt += bs;
1205119a934aSSatish Balay         }
1206119a934aSSatish Balay       }
1207119a934aSSatish Balay     }
1208119a934aSSatish Balay   }
12090419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12100419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1211faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1212faf6580fSSatish Balay   return 0;
1213faf6580fSSatish Balay }
1214faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215faf6580fSSatish Balay {
1216faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1217faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1218faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1219faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1220faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1221faf6580fSSatish Balay 
1222faf6580fSSatish Balay 
1223faf6580fSSatish Balay 
1224*90f02eecSBarry Smith   VecGetArray_Fast(xx,xg); x = xg;
1225*90f02eecSBarry Smith   VecGetArray_Fast(zz,zg); z = zg;
1226faf6580fSSatish Balay 
1227648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1228648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1229648c1d95SSatish Balay 
1230faf6580fSSatish Balay 
1231faf6580fSSatish Balay   idx   = a->j;
1232faf6580fSSatish Balay   v     = a->a;
1233faf6580fSSatish Balay   ii    = a->i;
1234faf6580fSSatish Balay 
1235faf6580fSSatish Balay   switch (bs) {
1236faf6580fSSatish Balay   case 1:
1237faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1238faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1239faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1240faf6580fSSatish Balay       ib = idx + ai[i];
1241faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1242faf6580fSSatish Balay         rval    = ib[j];
1243faf6580fSSatish Balay         z[rval] += *v++ * x1;
1244faf6580fSSatish Balay       }
1245faf6580fSSatish Balay     }
1246faf6580fSSatish Balay     break;
1247faf6580fSSatish Balay   case 2:
1248faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1249faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1250faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1251faf6580fSSatish Balay       ib = idx + ai[i];
1252faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1253faf6580fSSatish Balay         rval      = ib[j]*2;
1254faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1255faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1256faf6580fSSatish Balay         v += 4;
1257faf6580fSSatish Balay       }
1258faf6580fSSatish Balay     }
1259faf6580fSSatish Balay     break;
1260faf6580fSSatish Balay   case 3:
1261faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1262faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1263faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1264faf6580fSSatish Balay       ib = idx + ai[i];
1265faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1266faf6580fSSatish Balay         rval      = ib[j]*3;
1267faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1268faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1269faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1270faf6580fSSatish Balay         v += 9;
1271faf6580fSSatish Balay       }
1272faf6580fSSatish Balay     }
1273faf6580fSSatish Balay     break;
1274faf6580fSSatish Balay   case 4:
1275faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1276faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1277faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278faf6580fSSatish Balay       ib = idx + ai[i];
1279faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1280faf6580fSSatish Balay         rval      = ib[j]*4;
1281faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1282faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1283faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1284faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1285faf6580fSSatish Balay         v += 16;
1286faf6580fSSatish Balay       }
1287faf6580fSSatish Balay     }
1288faf6580fSSatish Balay     break;
1289faf6580fSSatish Balay   case 5:
1290faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1291faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1292faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1293faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1294faf6580fSSatish Balay       ib = idx + ai[i];
1295faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1296faf6580fSSatish Balay         rval      = ib[j]*5;
1297faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1298faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1299faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1300faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1301faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1302faf6580fSSatish Balay         v += 25;
1303faf6580fSSatish Balay       }
1304faf6580fSSatish Balay     }
1305faf6580fSSatish Balay     break;
1306faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1307faf6580fSSatish Balay     default: {
1308faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1309faf6580fSSatish Balay       if (!a->mult_work) {
1310faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1311faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1312faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1313faf6580fSSatish Balay       }
1314faf6580fSSatish Balay       work = a->mult_work;
1315faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1316faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1317faf6580fSSatish Balay         ncols = n*bs;
1318faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1319faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1320faf6580fSSatish Balay         v += n*bs2;
1321faf6580fSSatish Balay         x += bs;
1322faf6580fSSatish Balay         workt = work;
1323faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1324faf6580fSSatish Balay           zb = z + bs*(*idx++);
1325faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1326faf6580fSSatish Balay           workt += bs;
1327faf6580fSSatish Balay         }
1328faf6580fSSatish Balay       }
1329faf6580fSSatish Balay     }
1330faf6580fSSatish Balay   }
1331faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1332faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1333faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
13342593348eSBarry Smith   return 0;
13352593348eSBarry Smith }
13362593348eSBarry Smith 
13374e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
13382593348eSBarry Smith {
1339b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13404e220ebcSLois Curfman McInnes 
13414e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
13424e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
13434e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
13444e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
13454e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
13464e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
13474e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
13484e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
13494e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
13504e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
13514e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
13524e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
13534e220ebcSLois Curfman McInnes   info->memory       = A->mem;
13544e220ebcSLois Curfman McInnes   if (A->factor) {
13554e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
13564e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
13574e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
13584e220ebcSLois Curfman McInnes   } else {
13594e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
13604e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
13614e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
13624e220ebcSLois Curfman McInnes   }
13632593348eSBarry Smith   return 0;
13642593348eSBarry Smith }
13652593348eSBarry Smith 
136691d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
136791d316f6SSatish Balay {
136891d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
136991d316f6SSatish Balay 
137091d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
137191d316f6SSatish Balay 
137291d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
137391d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1374a7c10996SSatish Balay       (a->nz != b->nz)) {
137591d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137691d316f6SSatish Balay   }
137791d316f6SSatish Balay 
137891d316f6SSatish Balay   /* if the a->i are the same */
137991d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
138091d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138191d316f6SSatish Balay   }
138291d316f6SSatish Balay 
138391d316f6SSatish Balay   /* if a->j are the same */
138491d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
138591d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138691d316f6SSatish Balay   }
138791d316f6SSatish Balay 
138891d316f6SSatish Balay   /* if a->a are the same */
138991d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
139091d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
139191d316f6SSatish Balay   }
139291d316f6SSatish Balay   *flg = PETSC_TRUE;
139391d316f6SSatish Balay   return 0;
139491d316f6SSatish Balay 
139591d316f6SSatish Balay }
139691d316f6SSatish Balay 
139791d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
139891d316f6SSatish Balay {
139991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14007e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
140117e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
140217e48fc4SSatish Balay 
140317e48fc4SSatish Balay   bs   = a->bs;
140417e48fc4SSatish Balay   aa   = a->a;
140517e48fc4SSatish Balay   ai   = a->i;
140617e48fc4SSatish Balay   aj   = a->j;
140717e48fc4SSatish Balay   ambs = a->mbs;
14089df24120SSatish Balay   bs2  = a->bs2;
140991d316f6SSatish Balay 
141091d316f6SSatish Balay   VecSet(&zero,v);
1411*90f02eecSBarry Smith   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
14129867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
141317e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
141417e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
141517e48fc4SSatish Balay       if (aj[j] == i) {
141617e48fc4SSatish Balay         row  = i*bs;
14177e67e3f9SSatish Balay         aa_j = aa+j*bs2;
14187e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
141991d316f6SSatish Balay         break;
142091d316f6SSatish Balay       }
142191d316f6SSatish Balay     }
142291d316f6SSatish Balay   }
142391d316f6SSatish Balay   return 0;
142491d316f6SSatish Balay }
142591d316f6SSatish Balay 
14269867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
14279867e207SSatish Balay {
14289867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14299867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
14307e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
14319867e207SSatish Balay 
14329867e207SSatish Balay   ai  = a->i;
14339867e207SSatish Balay   aj  = a->j;
14349867e207SSatish Balay   aa  = a->a;
14359867e207SSatish Balay   m   = a->m;
14369867e207SSatish Balay   n   = a->n;
14379867e207SSatish Balay   bs  = a->bs;
14389867e207SSatish Balay   mbs = a->mbs;
14399df24120SSatish Balay   bs2 = a->bs2;
14409867e207SSatish Balay   if (ll) {
1441*90f02eecSBarry Smith     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
14429867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
14439867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14449867e207SSatish Balay       M  = ai[i+1] - ai[i];
14459867e207SSatish Balay       li = l + i*bs;
14467e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14479867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14487e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
14499867e207SSatish Balay           (*v++) *= li[k%bs];
14509867e207SSatish Balay         }
14519867e207SSatish Balay       }
14529867e207SSatish Balay     }
14539867e207SSatish Balay   }
14549867e207SSatish Balay 
14559867e207SSatish Balay   if (rr) {
1456*90f02eecSBarry Smith     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
14579867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
14589867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14599867e207SSatish Balay       M  = ai[i+1] - ai[i];
14607e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14619867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14629867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
14639867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
14649867e207SSatish Balay           x = ri[k];
14659867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
14669867e207SSatish Balay         }
14679867e207SSatish Balay       }
14689867e207SSatish Balay     }
14699867e207SSatish Balay   }
14709867e207SSatish Balay   return 0;
14719867e207SSatish Balay }
14729867e207SSatish Balay 
14739867e207SSatish Balay 
1474b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1475b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
14762a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1477736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1478736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
14791a6a6d98SBarry Smith 
14801a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
14811a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
14821a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
14831a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
14841a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
14851a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
14861a6a6d98SBarry Smith 
14877fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
14887fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
14897fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
14907fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
14917fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
14927fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
14932593348eSBarry Smith 
1494b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
14952593348eSBarry Smith {
1496b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14972593348eSBarry Smith   Scalar      *v = a->a;
14982593348eSBarry Smith   double      sum = 0.0;
14999df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
15002593348eSBarry Smith 
15012593348eSBarry Smith   if (type == NORM_FROBENIUS) {
15020419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
15032593348eSBarry Smith #if defined(PETSC_COMPLEX)
15042593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
15052593348eSBarry Smith #else
15062593348eSBarry Smith       sum += (*v)*(*v); v++;
15072593348eSBarry Smith #endif
15082593348eSBarry Smith     }
15092593348eSBarry Smith     *norm = sqrt(sum);
15102593348eSBarry Smith   }
15112593348eSBarry Smith   else {
1512b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
15132593348eSBarry Smith   }
15142593348eSBarry Smith   return 0;
15152593348eSBarry Smith }
15162593348eSBarry Smith 
15172593348eSBarry Smith /*
15182593348eSBarry Smith      note: This can only work for identity for row and col. It would
15192593348eSBarry Smith    be good to check this and otherwise generate an error.
15202593348eSBarry Smith */
1521b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
15222593348eSBarry Smith {
1523b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15242593348eSBarry Smith   Mat         outA;
1525de6a44a3SBarry Smith   int         ierr;
15262593348eSBarry Smith 
1527b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
15282593348eSBarry Smith 
15292593348eSBarry Smith   outA          = inA;
15302593348eSBarry Smith   inA->factor   = FACTOR_LU;
15312593348eSBarry Smith   a->row        = row;
15322593348eSBarry Smith   a->col        = col;
15332593348eSBarry Smith 
1534de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
15352593348eSBarry Smith 
15362593348eSBarry Smith   if (!a->diag) {
1537b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
15382593348eSBarry Smith   }
15397fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
15402593348eSBarry Smith   return 0;
15412593348eSBarry Smith }
15422593348eSBarry Smith 
1543b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
15442593348eSBarry Smith {
1545b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15469df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1547b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1548b6490206SBarry Smith   PLogFlops(totalnz);
15492593348eSBarry Smith   return 0;
15502593348eSBarry Smith }
15512593348eSBarry Smith 
15527e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
15537e67e3f9SSatish Balay {
15547e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15557e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1556a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
15579df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
15587e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
15597e67e3f9SSatish Balay 
15607e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
15617e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
15627e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
15637e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1564a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
15657e67e3f9SSatish Balay     nrow = ailen[brow];
15667e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
15677e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
15687e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1569a7c10996SSatish Balay       col  = in[l] ;
15707e67e3f9SSatish Balay       bcol = col/bs;
15717e67e3f9SSatish Balay       cidx = col%bs;
15727e67e3f9SSatish Balay       ridx = row%bs;
15737e67e3f9SSatish Balay       high = nrow;
15747e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
15757e67e3f9SSatish Balay       while (high-low > 5) {
15767e67e3f9SSatish Balay         t = (low+high)/2;
15777e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
15787e67e3f9SSatish Balay         else             low  = t;
15797e67e3f9SSatish Balay       }
15807e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
15817e67e3f9SSatish Balay         if (rp[i] > bcol) break;
15827e67e3f9SSatish Balay         if (rp[i] == bcol) {
15837e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
15847e67e3f9SSatish Balay           goto finished;
15857e67e3f9SSatish Balay         }
15867e67e3f9SSatish Balay       }
15877e67e3f9SSatish Balay       *v++ = zero;
15887e67e3f9SSatish Balay       finished:;
15897e67e3f9SSatish Balay     }
15907e67e3f9SSatish Balay   }
15917e67e3f9SSatish Balay   return 0;
15927e67e3f9SSatish Balay }
15937e67e3f9SSatish Balay 
15945a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
15955a838052SSatish Balay {
15965a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
15975a838052SSatish Balay   *bs = baij->bs;
15985a838052SSatish Balay   return 0;
15995a838052SSatish Balay }
16005a838052SSatish Balay 
1601d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1602d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1603d9b7c43dSSatish Balay {
1604d9b7c43dSSatish Balay   int i,row;
1605d9b7c43dSSatish Balay   row = idx[0];
1606d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1607d9b7c43dSSatish Balay 
1608d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1609d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1610d9b7c43dSSatish Balay   }
1611d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1612d9b7c43dSSatish Balay   return 0;
1613d9b7c43dSSatish Balay }
1614d9b7c43dSSatish Balay 
1615d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1616d9b7c43dSSatish Balay {
1617d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1618d9b7c43dSSatish Balay   IS          is_local;
1619d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1620d9b7c43dSSatish Balay   PetscTruth  flg;
1621d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1622d9b7c43dSSatish Balay 
1623d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1624d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1625d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1626537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1627d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1628d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1629d9b7c43dSSatish Balay 
1630d9b7c43dSSatish Balay   i = 0;
1631d9b7c43dSSatish Balay   while (i < is_n) {
1632d9b7c43dSSatish Balay     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1633d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1634d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1635d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1636d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1637d9b7c43dSSatish Balay       i += bs;
1638d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1639d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1640d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1641d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1642d9b7c43dSSatish Balay         aa[0] = zero;
1643d9b7c43dSSatish Balay         aa+=bs;
1644d9b7c43dSSatish Balay       }
1645d9b7c43dSSatish Balay       i++;
1646d9b7c43dSSatish Balay     }
1647d9b7c43dSSatish Balay   }
1648d9b7c43dSSatish Balay   if (diag) {
1649d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1650d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1651d9b7c43dSSatish Balay     }
1652d9b7c43dSSatish Balay   }
1653d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1654d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1655d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1656d9b7c43dSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1657d9b7c43dSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1658d9b7c43dSSatish Balay 
1659d9b7c43dSSatish Balay   return 0;
1660d9b7c43dSSatish Balay }
1661ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1662ba4ca20aSSatish Balay {
1663ba4ca20aSSatish Balay   static int called = 0;
1664ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1665ba4ca20aSSatish Balay 
1666ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1667ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1668ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1669ba4ca20aSSatish Balay   return 0;
1670ba4ca20aSSatish Balay }
1671d9b7c43dSSatish Balay 
16722593348eSBarry Smith /* -------------------------------------------------------------------*/
1673cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
16749867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1675f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1676faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
16771a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1678b6490206SBarry Smith        0,0,
1679de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1680b6490206SBarry Smith        0,
1681f2501298SSatish Balay        MatTranspose_SeqBAIJ,
168217e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
16839867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1684584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1685b6490206SBarry Smith        0,
1686d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
16877fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1688b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1689de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1690b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1691b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1692b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1693af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
16947e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1695ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
16963b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
16973b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
16983b2fbd54SBarry Smith        MatRestoreRowIJ_SeqBAIJ};
16992593348eSBarry Smith 
17002593348eSBarry Smith /*@C
170144cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
170244cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
170344cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
17042bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
17052bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
17062593348eSBarry Smith 
17072593348eSBarry Smith    Input Parameters:
17082593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1709b6490206SBarry Smith .  bs - size of block
17102593348eSBarry Smith .  m - number of rows
17112593348eSBarry Smith .  n - number of columns
1712b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
17132bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
17142bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
17152593348eSBarry Smith 
17162593348eSBarry Smith    Output Parameter:
17172593348eSBarry Smith .  A - the matrix
17182593348eSBarry Smith 
17192593348eSBarry Smith    Notes:
172044cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
17212593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
172244cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
17232593348eSBarry Smith 
17242593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
17252593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
17262593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
17272593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
17282593348eSBarry Smith 
172944cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17302593348eSBarry Smith @*/
1731b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
17322593348eSBarry Smith {
17332593348eSBarry Smith   Mat         B;
1734b6490206SBarry Smith   Mat_SeqBAIJ *b;
17353b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
17363b2fbd54SBarry Smith 
17373b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
17383b2fbd54SBarry Smith   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1739b6490206SBarry Smith 
1740f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1741f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
17422593348eSBarry Smith 
17432593348eSBarry Smith   *A = 0;
1744b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
17452593348eSBarry Smith   PLogObjectCreate(B);
1746b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
174744cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
17482593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17491a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
17501a6a6d98SBarry Smith   if (!flg) {
17517fc0212eSBarry Smith     switch (bs) {
17527fc0212eSBarry Smith       case 1:
17537fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17541a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
175539b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1756f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
17577fc0212eSBarry Smith        break;
17584eeb42bcSBarry Smith       case 2:
17594eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
17601a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
176139b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1762f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
17636d84be18SBarry Smith         break;
1764f327dd97SBarry Smith       case 3:
1765f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
17661a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
176739b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1768f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
17694eeb42bcSBarry Smith         break;
17706d84be18SBarry Smith       case 4:
17716d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
17721a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
177339b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1774f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
17756d84be18SBarry Smith         break;
17766d84be18SBarry Smith       case 5:
17776d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
17781a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
177939b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1780f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
17816d84be18SBarry Smith         break;
17827fc0212eSBarry Smith     }
17831a6a6d98SBarry Smith   }
1784b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1785b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
17862593348eSBarry Smith   B->factor           = 0;
17872593348eSBarry Smith   B->lupivotthreshold = 1.0;
1788*90f02eecSBarry Smith   B->mapping          = 0;
17892593348eSBarry Smith   b->row              = 0;
17902593348eSBarry Smith   b->col              = 0;
17912593348eSBarry Smith   b->reallocs         = 0;
17922593348eSBarry Smith 
179344cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
179444cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1795b6490206SBarry Smith   b->mbs     = mbs;
1796f2501298SSatish Balay   b->nbs     = nbs;
1797b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
17982593348eSBarry Smith   if (nnz == PETSC_NULL) {
1799119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
18002593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1801b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1802b6490206SBarry Smith     nz = nz*mbs;
18032593348eSBarry Smith   }
18042593348eSBarry Smith   else {
18052593348eSBarry Smith     nz = 0;
1806b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
18072593348eSBarry Smith   }
18082593348eSBarry Smith 
18092593348eSBarry Smith   /* allocate the matrix space */
18107e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
18112593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
18127e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
18137e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
18142593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
18152593348eSBarry Smith   b->i  = b->j + nz;
18162593348eSBarry Smith   b->singlemalloc = 1;
18172593348eSBarry Smith 
1818de6a44a3SBarry Smith   b->i[0] = 0;
1819b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
18202593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
18212593348eSBarry Smith   }
18222593348eSBarry Smith 
1823b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1824b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1825b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1826b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
18272593348eSBarry Smith 
1828b6490206SBarry Smith   b->bs               = bs;
18299df24120SSatish Balay   b->bs2              = bs2;
1830b6490206SBarry Smith   b->mbs              = mbs;
18312593348eSBarry Smith   b->nz               = 0;
183218e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
18332593348eSBarry Smith   b->sorted           = 0;
18342593348eSBarry Smith   b->roworiented      = 1;
18352593348eSBarry Smith   b->nonew            = 0;
18362593348eSBarry Smith   b->diag             = 0;
18372593348eSBarry Smith   b->solve_work       = 0;
1838de6a44a3SBarry Smith   b->mult_work        = 0;
18392593348eSBarry Smith   b->spptr            = 0;
18404e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
18414e220ebcSLois Curfman McInnes 
18422593348eSBarry Smith   *A = B;
18432593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
18442593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
18452593348eSBarry Smith   return 0;
18462593348eSBarry Smith }
18472593348eSBarry Smith 
1848b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
18492593348eSBarry Smith {
18502593348eSBarry Smith   Mat         C;
1851b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
18529df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1853de6a44a3SBarry Smith 
1854de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
18552593348eSBarry Smith 
18562593348eSBarry Smith   *B = 0;
1857b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
18582593348eSBarry Smith   PLogObjectCreate(C);
1859b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
18602593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1861b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1862b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
18632593348eSBarry Smith   C->factor     = A->factor;
18642593348eSBarry Smith   c->row        = 0;
18652593348eSBarry Smith   c->col        = 0;
18662593348eSBarry Smith   C->assembled  = PETSC_TRUE;
18672593348eSBarry Smith 
186844cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
186944cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
187044cd7ae7SLois Curfman McInnes   C->M          = a->m;
187144cd7ae7SLois Curfman McInnes   C->N          = a->n;
187244cd7ae7SLois Curfman McInnes 
1873b6490206SBarry Smith   c->bs         = a->bs;
18749df24120SSatish Balay   c->bs2        = a->bs2;
1875b6490206SBarry Smith   c->mbs        = a->mbs;
1876b6490206SBarry Smith   c->nbs        = a->nbs;
18772593348eSBarry Smith 
1878b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1879b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1880b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
18812593348eSBarry Smith     c->imax[i] = a->imax[i];
18822593348eSBarry Smith     c->ilen[i] = a->ilen[i];
18832593348eSBarry Smith   }
18842593348eSBarry Smith 
18852593348eSBarry Smith   /* allocate the matrix space */
18862593348eSBarry Smith   c->singlemalloc = 1;
18877e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
18882593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
18897e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1890de6a44a3SBarry Smith   c->i  = c->j + nz;
1891b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1892b6490206SBarry Smith   if (mbs > 0) {
1893de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
18942593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
18957e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
18962593348eSBarry Smith     }
18972593348eSBarry Smith   }
18982593348eSBarry Smith 
1899b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
19002593348eSBarry Smith   c->sorted      = a->sorted;
19012593348eSBarry Smith   c->roworiented = a->roworiented;
19022593348eSBarry Smith   c->nonew       = a->nonew;
19032593348eSBarry Smith 
19042593348eSBarry Smith   if (a->diag) {
1905b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1906b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1907b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
19082593348eSBarry Smith       c->diag[i] = a->diag[i];
19092593348eSBarry Smith     }
19102593348eSBarry Smith   }
19112593348eSBarry Smith   else c->diag          = 0;
19122593348eSBarry Smith   c->nz                 = a->nz;
19132593348eSBarry Smith   c->maxnz              = a->maxnz;
19142593348eSBarry Smith   c->solve_work         = 0;
19152593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
19167fc0212eSBarry Smith   c->mult_work          = 0;
19172593348eSBarry Smith   *B = C;
19182593348eSBarry Smith   return 0;
19192593348eSBarry Smith }
19202593348eSBarry Smith 
192119bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
19222593348eSBarry Smith {
1923b6490206SBarry Smith   Mat_SeqBAIJ  *a;
19242593348eSBarry Smith   Mat          B;
1925de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1926b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
192735aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1928a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1929b6490206SBarry Smith   Scalar       *aa;
193019bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
19312593348eSBarry Smith 
19321a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1933de6a44a3SBarry Smith   bs2  = bs*bs;
1934b6490206SBarry Smith 
19352593348eSBarry Smith   MPI_Comm_size(comm,&size);
1936b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
193790ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
193877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1939de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
19402593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
19412593348eSBarry Smith 
1942b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
194335aab85fSBarry Smith 
194435aab85fSBarry Smith   /*
194535aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
194635aab85fSBarry Smith     divisible by the blocksize
194735aab85fSBarry Smith   */
1948b6490206SBarry Smith   mbs        = M/bs;
194935aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
195035aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
195135aab85fSBarry Smith   else                  mbs++;
195235aab85fSBarry Smith   if (extra_rows) {
1953537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
195435aab85fSBarry Smith   }
1955b6490206SBarry Smith 
19562593348eSBarry Smith   /* read in row lengths */
195735aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
195877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
195935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
19602593348eSBarry Smith 
1961b6490206SBarry Smith   /* read in column indices */
196235aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
196377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
196435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1965b6490206SBarry Smith 
1966b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1967b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1968b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
196935aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
197035aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
197135aab85fSBarry Smith   masked = mask + mbs;
1972b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1973b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
197435aab85fSBarry Smith     nmask = 0;
1975b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1976b6490206SBarry Smith       kmax = rowlengths[rowcount];
1977b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
197835aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
197935aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1980b6490206SBarry Smith       }
1981b6490206SBarry Smith       rowcount++;
1982b6490206SBarry Smith     }
198335aab85fSBarry Smith     browlengths[i] += nmask;
198435aab85fSBarry Smith     /* zero out the mask elements we set */
198535aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1986b6490206SBarry Smith   }
1987b6490206SBarry Smith 
19882593348eSBarry Smith   /* create our matrix */
198935aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
199035aab85fSBarry Smith          CHKERRQ(ierr);
19912593348eSBarry Smith   B = *A;
1992b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
19932593348eSBarry Smith 
19942593348eSBarry Smith   /* set matrix "i" values */
1995de6a44a3SBarry Smith   a->i[0] = 0;
1996b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1997b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1998b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
19992593348eSBarry Smith   }
2000b6490206SBarry Smith   a->nz         = 0;
2001b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
20022593348eSBarry Smith 
2003b6490206SBarry Smith   /* read in nonzero values */
200435aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
200577c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
200635aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2007b6490206SBarry Smith 
2008b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2009b6490206SBarry Smith   nzcount = 0; jcount = 0;
2010b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2011b6490206SBarry Smith     nzcountb = nzcount;
201235aab85fSBarry Smith     nmask    = 0;
2013b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2014b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2015b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
201635aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
201735aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2018b6490206SBarry Smith       }
2019b6490206SBarry Smith       rowcount++;
2020b6490206SBarry Smith     }
2021de6a44a3SBarry Smith     /* sort the masked values */
202277c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2023de6a44a3SBarry Smith 
2024b6490206SBarry Smith     /* set "j" values into matrix */
2025b6490206SBarry Smith     maskcount = 1;
202635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
202735aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2028de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2029b6490206SBarry Smith     }
2030b6490206SBarry Smith     /* set "a" values into matrix */
2031de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2032b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2033b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2034b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2035de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2036de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2037de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2038de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2039b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2040b6490206SBarry Smith       }
2041b6490206SBarry Smith     }
204235aab85fSBarry Smith     /* zero out the mask elements we set */
204335aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2044b6490206SBarry Smith   }
204535aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2046b6490206SBarry Smith 
2047b6490206SBarry Smith   PetscFree(rowlengths);
2048b6490206SBarry Smith   PetscFree(browlengths);
2049b6490206SBarry Smith   PetscFree(aa);
2050b6490206SBarry Smith   PetscFree(jj);
2051b6490206SBarry Smith   PetscFree(mask);
2052b6490206SBarry Smith 
2053b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2054de6a44a3SBarry Smith 
2055de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2056de6a44a3SBarry Smith   if (flg) {
205719bcc07fSBarry Smith     Viewer tviewer;
205819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2059639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
206019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
206119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2062de6a44a3SBarry Smith   }
2063de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2064de6a44a3SBarry Smith   if (flg) {
206519bcc07fSBarry Smith     Viewer tviewer;
206619bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2067639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
206819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
206919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2070de6a44a3SBarry Smith   }
2071de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2072de6a44a3SBarry Smith   if (flg) {
207319bcc07fSBarry Smith     Viewer tviewer;
207419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
207519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
207619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2077de6a44a3SBarry Smith   }
2078de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2079de6a44a3SBarry Smith   if (flg) {
208019bcc07fSBarry Smith     Viewer tviewer;
208119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2082639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
208319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
208419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2085de6a44a3SBarry Smith   }
2086de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2087de6a44a3SBarry Smith   if (flg) {
208819bcc07fSBarry Smith     Viewer tviewer;
208919bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
209019bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
209119bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
209219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2093de6a44a3SBarry Smith   }
20942593348eSBarry Smith   return 0;
20952593348eSBarry Smith }
20962593348eSBarry Smith 
20972593348eSBarry Smith 
20982593348eSBarry Smith 
2099