xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 639f9d9dbbc54d6ac4e42e98283c540b41bb2cee)
12593348eSBarry Smith #ifndef lint
2*639f9d9dSBarry Smith static char vcid[] = "$Id: baij.c,v 1.68 1996/09/14 03:08:32 bsmith Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
142593348eSBarry Smith 
15de6a44a3SBarry Smith /*
16de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
17de6a44a3SBarry Smith */
18de6a44a3SBarry Smith 
19de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
20de6a44a3SBarry Smith {
21de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
227fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
23de6a44a3SBarry Smith 
24de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
267fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
27de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28de6a44a3SBarry Smith       if (a->j[j] == i) {
29de6a44a3SBarry Smith         diag[i] = j;
30de6a44a3SBarry Smith         break;
31de6a44a3SBarry Smith       }
32de6a44a3SBarry Smith     }
33de6a44a3SBarry Smith   }
34de6a44a3SBarry Smith   a->diag = diag;
35de6a44a3SBarry Smith   return 0;
36de6a44a3SBarry Smith }
372593348eSBarry Smith 
382593348eSBarry Smith #include "draw.h"
392593348eSBarry Smith #include "pinclude/pviewer.h"
4077c4ece6SBarry Smith #include "sys.h"
412593348eSBarry Smith 
423b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
433b2fbd54SBarry Smith 
443b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
453b2fbd54SBarry Smith                             PetscTruth *done)
463b2fbd54SBarry Smith {
473b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
483b2fbd54SBarry Smith   int         ierr, n = a->mbs,i;
493b2fbd54SBarry Smith 
503b2fbd54SBarry Smith   *nn = n;
513b2fbd54SBarry Smith   if (!ia) return 0;
523b2fbd54SBarry Smith   if (symmetric) {
533b2fbd54SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
543b2fbd54SBarry Smith   } else if (oshift == 1) {
553b2fbd54SBarry Smith     /* temporarily add 1 to i and j indices */
563b2fbd54SBarry Smith     int nz = a->i[n] + 1;
573b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]++;
583b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]++;
593b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
603b2fbd54SBarry Smith   } else {
613b2fbd54SBarry Smith     *ia = a->i; *ja = a->j;
623b2fbd54SBarry Smith   }
633b2fbd54SBarry Smith 
643b2fbd54SBarry Smith   return 0;
653b2fbd54SBarry Smith }
663b2fbd54SBarry Smith 
673b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
683b2fbd54SBarry Smith                                 PetscTruth *done)
693b2fbd54SBarry Smith {
703b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
713b2fbd54SBarry Smith   int         i,n = a->mbs;
723b2fbd54SBarry Smith 
733b2fbd54SBarry Smith   if (!ia) return 0;
743b2fbd54SBarry Smith   if (symmetric) {
753b2fbd54SBarry Smith     PetscFree(*ia);
763b2fbd54SBarry Smith     PetscFree(*ja);
773b2fbd54SBarry Smith   }
783b2fbd54SBarry Smith     if (oshift == 1) {
793b2fbd54SBarry Smith     int nz = a->i[n];
803b2fbd54SBarry Smith     for ( i=0; i<nz; i++ ) a->j[i]--;
813b2fbd54SBarry Smith     for ( i=0; i<n+1; i++ ) a->i[i]--;
823b2fbd54SBarry Smith   }
833b2fbd54SBarry Smith   return 0;
843b2fbd54SBarry Smith }
853b2fbd54SBarry Smith 
863b2fbd54SBarry Smith 
87b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
882593348eSBarry Smith {
89b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
909df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
91b6490206SBarry Smith   Scalar      *aa;
922593348eSBarry Smith 
9390ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
942593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
952593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
963b2fbd54SBarry Smith 
972593348eSBarry Smith   col_lens[1] = a->m;
982593348eSBarry Smith   col_lens[2] = a->n;
997e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1002593348eSBarry Smith 
1012593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
102b6490206SBarry Smith   count = 0;
103b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
104b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
105b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
106b6490206SBarry Smith     }
1072593348eSBarry Smith   }
10877c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1092593348eSBarry Smith   PetscFree(col_lens);
1102593348eSBarry Smith 
1112593348eSBarry Smith   /* store column indices (zero start index) */
1127e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
113b6490206SBarry Smith   count = 0;
114b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
115b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
116b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
117b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
118b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1192593348eSBarry Smith         }
1202593348eSBarry Smith       }
121b6490206SBarry Smith     }
122b6490206SBarry Smith   }
1237e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
124b6490206SBarry Smith   PetscFree(jj);
1252593348eSBarry Smith 
1262593348eSBarry Smith   /* store nonzero values */
1277e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
128b6490206SBarry Smith   count = 0;
129b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
130b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
131b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
132b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1337e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
134b6490206SBarry Smith         }
135b6490206SBarry Smith       }
136b6490206SBarry Smith     }
137b6490206SBarry Smith   }
1387e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
139b6490206SBarry Smith   PetscFree(aa);
1402593348eSBarry Smith   return 0;
1412593348eSBarry Smith }
1422593348eSBarry Smith 
143b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1442593348eSBarry Smith {
145b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1469df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1472593348eSBarry Smith   FILE        *fd;
1482593348eSBarry Smith   char        *outputname;
1492593348eSBarry Smith 
15090ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1512593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
15290ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
153*639f9d9dSBarry Smith   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1547ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1552593348eSBarry Smith   }
156*639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
157b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1582593348eSBarry Smith   }
159*639f9d9dSBarry Smith   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
16044cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
16144cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
16244cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
16344cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
16444cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
16544cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
16644cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
16744cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
16844cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
16944cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
17044cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
17144cd7ae7SLois Curfman McInnes #else
17244cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
17344cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
17444cd7ae7SLois Curfman McInnes #endif
17544cd7ae7SLois Curfman McInnes           }
17644cd7ae7SLois Curfman McInnes         }
17744cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
17844cd7ae7SLois Curfman McInnes       }
17944cd7ae7SLois Curfman McInnes     }
18044cd7ae7SLois Curfman McInnes   }
1812593348eSBarry Smith   else {
182b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
183b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
184b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
185b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
186b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
18788685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1887e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
18988685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1907e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
19188685aaeSLois Curfman McInnes           }
19288685aaeSLois Curfman McInnes           else {
1937e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
19488685aaeSLois Curfman McInnes           }
19588685aaeSLois Curfman McInnes #else
1967e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
19788685aaeSLois Curfman McInnes #endif
1982593348eSBarry Smith           }
1992593348eSBarry Smith         }
2002593348eSBarry Smith         fprintf(fd,"\n");
2012593348eSBarry Smith       }
2022593348eSBarry Smith     }
203b6490206SBarry Smith   }
2042593348eSBarry Smith   fflush(fd);
2052593348eSBarry Smith   return 0;
2062593348eSBarry Smith }
2072593348eSBarry Smith 
2083270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2093270192aSSatish Balay {
2103270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2113270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2123270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2133270192aSSatish Balay   Scalar       *aa;
2143270192aSSatish Balay   Draw         draw;
2153270192aSSatish Balay   DrawButton   button;
2163270192aSSatish Balay   PetscTruth   isnull;
2173270192aSSatish Balay 
2183270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2193270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2203270192aSSatish Balay 
2213270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2223270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2233270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2243270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2253270192aSSatish Balay   color = DRAW_BLUE;
2263270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2273270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2283270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2293270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2303270192aSSatish Balay       aa = a->a + j*bs2;
2313270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2323270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2333270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
2343270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2353270192aSSatish Balay         }
2363270192aSSatish Balay       }
2373270192aSSatish Balay     }
2383270192aSSatish Balay   }
2393270192aSSatish Balay   color = DRAW_CYAN;
2403270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2413270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2423270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2433270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2443270192aSSatish Balay       aa = a->a + j*bs2;
2453270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2463270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2473270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
2483270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2493270192aSSatish Balay         }
2503270192aSSatish Balay       }
2513270192aSSatish Balay     }
2523270192aSSatish Balay   }
2533270192aSSatish Balay 
2543270192aSSatish Balay   color = DRAW_RED;
2553270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2563270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2573270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2583270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2593270192aSSatish Balay       aa = a->a + j*bs2;
2603270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2613270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2623270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
2633270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2643270192aSSatish Balay         }
2653270192aSSatish Balay       }
2663270192aSSatish Balay     }
2673270192aSSatish Balay   }
2683270192aSSatish Balay 
2693270192aSSatish Balay   DrawFlush(draw);
2703270192aSSatish Balay   DrawGetPause(draw,&pause);
2713270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2723270192aSSatish Balay 
2733270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2743270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2753270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2763270192aSSatish Balay     DrawClear(draw);
2773270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2783270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2793270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2803270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2813270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2823270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2833270192aSSatish Balay     w *= scale; h *= scale;
2843270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2853270192aSSatish Balay     color = DRAW_BLUE;
2863270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2873270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2883270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2893270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
2903270192aSSatish Balay         aa = a->a + j*bs2;
2913270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
2923270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
2933270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
2943270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2953270192aSSatish Balay           }
2963270192aSSatish Balay         }
2973270192aSSatish Balay       }
2983270192aSSatish Balay     }
2993270192aSSatish Balay     color = DRAW_CYAN;
3003270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3013270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3023270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3033270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3043270192aSSatish Balay         aa = a->a + j*bs2;
3053270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3063270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3073270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
3083270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3093270192aSSatish Balay           }
3103270192aSSatish Balay         }
3113270192aSSatish Balay       }
3123270192aSSatish Balay     }
3133270192aSSatish Balay 
3143270192aSSatish Balay     color = DRAW_RED;
3153270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3163270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3173270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3183270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3193270192aSSatish Balay         aa = a->a + j*bs2;
3203270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3213270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3223270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
3233270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3243270192aSSatish Balay           }
3253270192aSSatish Balay         }
3263270192aSSatish Balay       }
3273270192aSSatish Balay     }
3283270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3293270192aSSatish Balay   }
3303270192aSSatish Balay   return 0;
3313270192aSSatish Balay }
3323270192aSSatish Balay 
333b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3342593348eSBarry Smith {
3352593348eSBarry Smith   Mat         A = (Mat) obj;
33619bcc07fSBarry Smith   ViewerType  vtype;
33719bcc07fSBarry Smith   int         ierr;
3382593348eSBarry Smith 
33919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
34019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
341b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
3422593348eSBarry Smith   }
34319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
344b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3452593348eSBarry Smith   }
34619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
347b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3482593348eSBarry Smith   }
34919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3503270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3512593348eSBarry Smith   }
3522593348eSBarry Smith   return 0;
3532593348eSBarry Smith }
354b6490206SBarry Smith 
355119a934aSSatish Balay #define CHUNKSIZE  10
356cd0e1443SSatish Balay 
357cd0e1443SSatish Balay /* This version has row oriented v  */
358*639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
359cd0e1443SSatish Balay {
360cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
362cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
363a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3649df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
365cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
366cd0e1443SSatish Balay 
367cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
368cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
3693b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
370cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
371cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
3723b2fbd54SBarry Smith #endif
373a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
374cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
375cd0e1443SSatish Balay     low = 0;
376cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
3773b2fbd54SBarry Smith #if defined(PETSC_BOPT_g)
378cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
379cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
3803b2fbd54SBarry Smith #endif
381a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
382cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
383cd0e1443SSatish Balay       if (roworiented) {
384cd0e1443SSatish Balay         value = *v++;
385cd0e1443SSatish Balay       }
386cd0e1443SSatish Balay       else {
387cd0e1443SSatish Balay         value = v[k + l*m];
388cd0e1443SSatish Balay       }
389cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
390cd0e1443SSatish Balay       while (high-low > 5) {
391cd0e1443SSatish Balay         t = (low+high)/2;
392cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
393cd0e1443SSatish Balay         else              low  = t;
394cd0e1443SSatish Balay       }
395cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
396cd0e1443SSatish Balay         if (rp[i] > bcol) break;
397cd0e1443SSatish Balay         if (rp[i] == bcol) {
3987e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
399cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
400cd0e1443SSatish Balay           else                  *bap  = value;
401cd0e1443SSatish Balay           goto noinsert;
402cd0e1443SSatish Balay         }
403cd0e1443SSatish Balay       }
404cd0e1443SSatish Balay       if (nonew) goto noinsert;
405cd0e1443SSatish Balay       if (nrow >= rmax) {
406cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
407cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
408cd0e1443SSatish Balay         Scalar *new_a;
409cd0e1443SSatish Balay 
410cd0e1443SSatish Balay         /* malloc new storage space */
4117e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
412cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4137e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
414cd0e1443SSatish Balay         new_i   = new_j + new_nz;
415cd0e1443SSatish Balay 
416cd0e1443SSatish Balay         /* copy over old data into new slots */
417cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4187e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
419a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
420a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
421a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
422cd0e1443SSatish Balay                                                            len*sizeof(int));
423a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
424a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
425a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
426a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
427cd0e1443SSatish Balay         /* free up old matrix storage */
428cd0e1443SSatish Balay         PetscFree(a->a);
429cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
430cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
431cd0e1443SSatish Balay         a->singlemalloc = 1;
432cd0e1443SSatish Balay 
433a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
434cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4357e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
43618e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
437cd0e1443SSatish Balay         a->reallocs++;
438119a934aSSatish Balay         a->nz++;
439cd0e1443SSatish Balay       }
4407e67e3f9SSatish Balay       N = nrow++ - 1;
441cd0e1443SSatish Balay       /* shift up all the later entries in this row */
442cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
443cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4447e67e3f9SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
445cd0e1443SSatish Balay       }
4467e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
447cd0e1443SSatish Balay       rp[i]                      = bcol;
4487e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
449cd0e1443SSatish Balay       noinsert:;
450cd0e1443SSatish Balay       low = i;
451cd0e1443SSatish Balay     }
452cd0e1443SSatish Balay     ailen[brow] = nrow;
453cd0e1443SSatish Balay   }
454cd0e1443SSatish Balay   return 0;
455cd0e1443SSatish Balay }
456cd0e1443SSatish Balay 
4570b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4580b824a48SSatish Balay {
4590b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4600b824a48SSatish Balay   *m = a->m; *n = a->n;
4610b824a48SSatish Balay   return 0;
4620b824a48SSatish Balay }
4630b824a48SSatish Balay 
4640b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4650b824a48SSatish Balay {
4660b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4670b824a48SSatish Balay   *m = 0; *n = a->m;
4680b824a48SSatish Balay   return 0;
4690b824a48SSatish Balay }
4700b824a48SSatish Balay 
4719867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4729867e207SSatish Balay {
4739867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4747e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4759867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4769867e207SSatish Balay 
4779867e207SSatish Balay   bs  = a->bs;
4789867e207SSatish Balay   ai  = a->i;
4799867e207SSatish Balay   aj  = a->j;
4809867e207SSatish Balay   aa  = a->a;
4819df24120SSatish Balay   bs2 = a->bs2;
4829867e207SSatish Balay 
4839867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
4849867e207SSatish Balay 
4859867e207SSatish Balay   bn  = row/bs;   /* Block number */
4869867e207SSatish Balay   bp  = row % bs; /* Block Position */
4879867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
4889867e207SSatish Balay   *nz = bs*M;
4899867e207SSatish Balay 
4909867e207SSatish Balay   if (v) {
4919867e207SSatish Balay     *v = 0;
4929867e207SSatish Balay     if (*nz) {
4939867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
4949867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
4959867e207SSatish Balay         v_i  = *v + i*bs;
4967e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
4977e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
4989867e207SSatish Balay       }
4999867e207SSatish Balay     }
5009867e207SSatish Balay   }
5019867e207SSatish Balay 
5029867e207SSatish Balay   if (idx) {
5039867e207SSatish Balay     *idx = 0;
5049867e207SSatish Balay     if (*nz) {
5059867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5069867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5079867e207SSatish Balay         idx_i = *idx + i*bs;
5089867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5099867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5109867e207SSatish Balay       }
5119867e207SSatish Balay     }
5129867e207SSatish Balay   }
5139867e207SSatish Balay   return 0;
5149867e207SSatish Balay }
5159867e207SSatish Balay 
5169867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5179867e207SSatish Balay {
5189867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5199867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5209867e207SSatish Balay   return 0;
5219867e207SSatish Balay }
522b6490206SBarry Smith 
523f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
524f2501298SSatish Balay {
525f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
526f2501298SSatish Balay   Mat         C;
527f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5289df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
529f2501298SSatish Balay   Scalar      *array=a->a;
530f2501298SSatish Balay 
531f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
532f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
533f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
534f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
535a7c10996SSatish Balay 
536a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
537f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
538f2501298SSatish Balay   PetscFree(col);
539f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
540f2501298SSatish Balay   cols = rows + bs;
541f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
542f2501298SSatish Balay     cols[0] = i*bs;
543f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
544f2501298SSatish Balay     len = ai[i+1] - ai[i];
545f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
546f2501298SSatish Balay       rows[0] = (*aj++)*bs;
547f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
548f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
549f2501298SSatish Balay       array += bs2;
550f2501298SSatish Balay     }
551f2501298SSatish Balay   }
5521073c447SSatish Balay   PetscFree(rows);
553f2501298SSatish Balay 
5546d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5556d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
556f2501298SSatish Balay 
557f2501298SSatish Balay   if (B != PETSC_NULL) {
558f2501298SSatish Balay     *B = C;
559f2501298SSatish Balay   } else {
560f2501298SSatish Balay     /* This isn't really an in-place transpose */
561f2501298SSatish Balay     PetscFree(a->a);
562f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
563f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
564f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
565f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
566f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
567f2501298SSatish Balay     PetscFree(a);
568f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
569f2501298SSatish Balay     PetscHeaderDestroy(C);
570f2501298SSatish Balay   }
571f2501298SSatish Balay   return 0;
572f2501298SSatish Balay }
573f2501298SSatish Balay 
574f2501298SSatish Balay 
575584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
576584200bdSSatish Balay {
577584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
578584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
579a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
5809df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
581584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
582584200bdSSatish Balay 
5836d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
584584200bdSSatish Balay 
585584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
586584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
587584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
588584200bdSSatish Balay     if (fshift) {
589a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
590584200bdSSatish Balay       N = ailen[i];
591584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
592584200bdSSatish Balay         ip[j-fshift] = ip[j];
5937e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
594584200bdSSatish Balay       }
595584200bdSSatish Balay     }
596584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
597584200bdSSatish Balay   }
598584200bdSSatish Balay   if (mbs) {
599584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
600584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
601584200bdSSatish Balay   }
602584200bdSSatish Balay   /* reset ilen and imax for each row */
603584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
604584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
605584200bdSSatish Balay   }
606a7c10996SSatish Balay   a->nz = ai[mbs];
607584200bdSSatish Balay 
608584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
609584200bdSSatish Balay   if (fshift && a->diag) {
610584200bdSSatish Balay     PetscFree(a->diag);
611584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
612584200bdSSatish Balay     a->diag = 0;
613584200bdSSatish Balay   }
614537820f0SBarry Smith   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
615537820f0SBarry Smith            fshift*bs2,a->nz*bs2,m,a->bs);
616584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
617584200bdSSatish Balay            a->reallocs);
6184e220ebcSLois Curfman McInnes   A->info.nz_unneeded  = (double)fshift*bs2;
6194e220ebcSLois Curfman McInnes 
620584200bdSSatish Balay   return 0;
621584200bdSSatish Balay }
622584200bdSSatish Balay 
623b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6242593348eSBarry Smith {
625b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6269df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6272593348eSBarry Smith   return 0;
6282593348eSBarry Smith }
6292593348eSBarry Smith 
630b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6312593348eSBarry Smith {
6322593348eSBarry Smith   Mat         A  = (Mat) obj;
633b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6342593348eSBarry Smith 
6352593348eSBarry Smith #if defined(PETSC_LOG)
6362593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6372593348eSBarry Smith #endif
6382593348eSBarry Smith   PetscFree(a->a);
6392593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6402593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6412593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6422593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6432593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
644de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6452593348eSBarry Smith   PetscFree(a);
646f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
647f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6482593348eSBarry Smith   return 0;
6492593348eSBarry Smith }
6502593348eSBarry Smith 
651b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6522593348eSBarry Smith {
653b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6546d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6556d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6566d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
6576d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6586d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6596d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
6606d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
6616d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
6626d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
66394a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
6646d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
6656d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
6662593348eSBarry Smith   else
667b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
6682593348eSBarry Smith   return 0;
6692593348eSBarry Smith }
6702593348eSBarry Smith 
6712593348eSBarry Smith 
6722593348eSBarry Smith /* -------------------------------------------------------*/
6732593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
6742593348eSBarry Smith /* -------------------------------------------------------*/
675b6490206SBarry Smith #include "pinclude/plapack.h"
676b6490206SBarry Smith 
67739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
6782593348eSBarry Smith {
679b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
68039b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
681c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
6822593348eSBarry Smith 
683c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
684c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
685b6490206SBarry Smith 
686119a934aSSatish Balay   idx   = a->j;
687119a934aSSatish Balay   v     = a->a;
688119a934aSSatish Balay   ii    = a->i;
689119a934aSSatish Balay 
690119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
691119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
692119a934aSSatish Balay     sum  = 0.0;
693119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
6941a6a6d98SBarry Smith     z[i] = sum;
695119a934aSSatish Balay   }
696c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
697c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
69839b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
69939b95ed1SBarry Smith   return 0;
70039b95ed1SBarry Smith }
70139b95ed1SBarry Smith 
70239b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
70339b95ed1SBarry Smith {
70439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
70539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
70639b95ed1SBarry Smith   register Scalar x1,x2;
70739b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
708c16cb8f2SBarry Smith   int             j,n;
70939b95ed1SBarry Smith 
710c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
711c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
71239b95ed1SBarry Smith 
71339b95ed1SBarry Smith   idx   = a->j;
71439b95ed1SBarry Smith   v     = a->a;
71539b95ed1SBarry Smith   ii    = a->i;
71639b95ed1SBarry Smith 
717119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
718119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
719119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
720119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
721119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
722119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
723119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
724119a934aSSatish Balay       v += 4;
725119a934aSSatish Balay     }
7261a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
727119a934aSSatish Balay     z += 2;
728119a934aSSatish Balay   }
729c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
730c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
73139b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
73239b95ed1SBarry Smith   return 0;
73339b95ed1SBarry Smith }
73439b95ed1SBarry Smith 
73539b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
73639b95ed1SBarry Smith {
73739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
73839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
739c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
74039b95ed1SBarry Smith 
741c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
742c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
74339b95ed1SBarry Smith 
74439b95ed1SBarry Smith   idx   = a->j;
74539b95ed1SBarry Smith   v     = a->a;
74639b95ed1SBarry Smith   ii    = a->i;
74739b95ed1SBarry Smith 
748119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
749119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
750119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
751119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
752119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
753119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
754119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
755119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
756119a934aSSatish Balay       v += 9;
757119a934aSSatish Balay     }
7581a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
759119a934aSSatish Balay     z += 3;
760119a934aSSatish Balay   }
761c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
762c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
76339b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
76439b95ed1SBarry Smith   return 0;
76539b95ed1SBarry Smith }
76639b95ed1SBarry Smith 
76739b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
76839b95ed1SBarry Smith {
76939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
77039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
77139b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
77239b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
773c16cb8f2SBarry Smith   int             j,n;
77439b95ed1SBarry Smith 
775c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
776c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
77739b95ed1SBarry Smith 
77839b95ed1SBarry Smith   idx   = a->j;
77939b95ed1SBarry Smith   v     = a->a;
78039b95ed1SBarry Smith   ii    = a->i;
78139b95ed1SBarry Smith 
782119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
783119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
784119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
785119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
786119a934aSSatish Balay       xb = x + 4*(*idx++);
787119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
788119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
789119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
790119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
791119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
792119a934aSSatish Balay       v += 16;
793119a934aSSatish Balay     }
7941a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
795119a934aSSatish Balay     z += 4;
796119a934aSSatish Balay   }
797c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
798c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
79939b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
80039b95ed1SBarry Smith   return 0;
80139b95ed1SBarry Smith }
80239b95ed1SBarry Smith 
80339b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
80439b95ed1SBarry Smith {
80539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
80639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
80739b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
808c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
80939b95ed1SBarry Smith 
810c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
811c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
81239b95ed1SBarry Smith 
81339b95ed1SBarry Smith   idx   = a->j;
81439b95ed1SBarry Smith   v     = a->a;
81539b95ed1SBarry Smith   ii    = a->i;
81639b95ed1SBarry Smith 
817119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
818119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
819119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
820119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
821119a934aSSatish Balay       xb = x + 5*(*idx++);
822119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
823119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
824119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
825119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
826119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
827119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
828119a934aSSatish Balay       v += 25;
829119a934aSSatish Balay     }
8301a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
831119a934aSSatish Balay     z += 5;
832119a934aSSatish Balay   }
833c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
834c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
83539b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
83639b95ed1SBarry Smith   return 0;
83739b95ed1SBarry Smith }
83839b95ed1SBarry Smith 
83939b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
84039b95ed1SBarry Smith {
84139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
84239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
843c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
844c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
845c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
84639b95ed1SBarry Smith 
847c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
848c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
84939b95ed1SBarry Smith 
85039b95ed1SBarry Smith   idx   = a->j;
85139b95ed1SBarry Smith   v     = a->a;
85239b95ed1SBarry Smith   ii    = a->i;
85339b95ed1SBarry Smith 
85439b95ed1SBarry Smith 
855119a934aSSatish Balay   if (!a->mult_work) {
8563b547af2SSatish Balay     k = PetscMax(a->m,a->n);
8572b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
858119a934aSSatish Balay   }
859119a934aSSatish Balay   work = a->mult_work;
860119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
861119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
862119a934aSSatish Balay     ncols = n*bs;
863119a934aSSatish Balay     workt = work;
864119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
865119a934aSSatish Balay       xb = x + bs*(*idx++);
866119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
867119a934aSSatish Balay       workt += bs;
868119a934aSSatish Balay     }
8691a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
870119a934aSSatish Balay     v += n*bs2;
871119a934aSSatish Balay     z += bs;
872119a934aSSatish Balay   }
873c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
874c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
8751a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
876bb42667fSSatish Balay   return 0;
877bb42667fSSatish Balay }
878bb42667fSSatish Balay 
879f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
880f44d3a6dSSatish Balay {
881f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
882f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
883c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
884f44d3a6dSSatish Balay 
885c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
886c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
887c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
888f44d3a6dSSatish Balay 
889f44d3a6dSSatish Balay   idx   = a->j;
890f44d3a6dSSatish Balay   v     = a->a;
891f44d3a6dSSatish Balay   ii    = a->i;
892f44d3a6dSSatish Balay 
893f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
894f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
895f44d3a6dSSatish Balay     sum  = y[i];
896f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
897f44d3a6dSSatish Balay     z[i] = sum;
898f44d3a6dSSatish Balay   }
899c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
900c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
901c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
902f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
903f44d3a6dSSatish Balay   return 0;
904f44d3a6dSSatish Balay }
905f44d3a6dSSatish Balay 
906f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
907f44d3a6dSSatish Balay {
908f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
909f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
910f44d3a6dSSatish Balay   register Scalar x1,x2;
911f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
912c16cb8f2SBarry Smith   int             j,n;
913f44d3a6dSSatish Balay 
914c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
915c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
916c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
917f44d3a6dSSatish Balay 
918f44d3a6dSSatish Balay   idx   = a->j;
919f44d3a6dSSatish Balay   v     = a->a;
920f44d3a6dSSatish Balay   ii    = a->i;
921f44d3a6dSSatish Balay 
922f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
923f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
924f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
925f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
926f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
927f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
928f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
929f44d3a6dSSatish Balay       v += 4;
930f44d3a6dSSatish Balay     }
931f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
932f44d3a6dSSatish Balay     z += 2; y += 2;
933f44d3a6dSSatish Balay   }
934c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
935c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
936c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
937f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
938f44d3a6dSSatish Balay   return 0;
939f44d3a6dSSatish Balay }
940f44d3a6dSSatish Balay 
941f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
942f44d3a6dSSatish Balay {
943f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
944f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
945c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
946f44d3a6dSSatish Balay 
947c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
948c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
949c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
950f44d3a6dSSatish Balay 
951f44d3a6dSSatish Balay   idx   = a->j;
952f44d3a6dSSatish Balay   v     = a->a;
953f44d3a6dSSatish Balay   ii    = a->i;
954f44d3a6dSSatish Balay 
955f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
956f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
957f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
958f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
959f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
960f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
961f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
962f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
963f44d3a6dSSatish Balay       v += 9;
964f44d3a6dSSatish Balay     }
965f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
966f44d3a6dSSatish Balay     z += 3; y += 3;
967f44d3a6dSSatish Balay   }
968c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
969c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
970c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
971f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
972f44d3a6dSSatish Balay   return 0;
973f44d3a6dSSatish Balay }
974f44d3a6dSSatish Balay 
975f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
976f44d3a6dSSatish Balay {
977f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
978f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
979f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
980f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
981c16cb8f2SBarry Smith   int             j,n;
982f44d3a6dSSatish Balay 
983c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
984c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
985c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
986f44d3a6dSSatish Balay 
987f44d3a6dSSatish Balay   idx   = a->j;
988f44d3a6dSSatish Balay   v     = a->a;
989f44d3a6dSSatish Balay   ii    = a->i;
990f44d3a6dSSatish Balay 
991f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
992f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
993f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
994f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
995f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
996f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
997f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
998f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
999f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1000f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1001f44d3a6dSSatish Balay       v += 16;
1002f44d3a6dSSatish Balay     }
1003f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1004f44d3a6dSSatish Balay     z += 4; y += 4;
1005f44d3a6dSSatish Balay   }
1006c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1007c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1008c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1009f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1010f44d3a6dSSatish Balay   return 0;
1011f44d3a6dSSatish Balay }
1012f44d3a6dSSatish Balay 
1013f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1014f44d3a6dSSatish Balay {
1015f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1016f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1017f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1018c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1019f44d3a6dSSatish Balay 
1020c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1021c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1022c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1023f44d3a6dSSatish Balay 
1024f44d3a6dSSatish Balay   idx   = a->j;
1025f44d3a6dSSatish Balay   v     = a->a;
1026f44d3a6dSSatish Balay   ii    = a->i;
1027f44d3a6dSSatish Balay 
1028f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1029f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1030f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1031f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1032f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1033f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1034f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1035f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1036f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1037f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1038f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1039f44d3a6dSSatish Balay       v += 25;
1040f44d3a6dSSatish Balay     }
1041f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1042f44d3a6dSSatish Balay     z += 5; y += 5;
1043f44d3a6dSSatish Balay   }
1044c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1045c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1046c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1047f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1048f44d3a6dSSatish Balay   return 0;
1049f44d3a6dSSatish Balay }
1050f44d3a6dSSatish Balay 
1051f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1052f44d3a6dSSatish Balay {
1053f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1054f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1055f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1056f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1057f44d3a6dSSatish Balay 
1058f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1059f44d3a6dSSatish Balay 
1060c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1061c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1062f44d3a6dSSatish Balay 
1063f44d3a6dSSatish Balay   idx   = a->j;
1064f44d3a6dSSatish Balay   v     = a->a;
1065f44d3a6dSSatish Balay   ii    = a->i;
1066f44d3a6dSSatish Balay 
1067f44d3a6dSSatish Balay 
1068f44d3a6dSSatish Balay   if (!a->mult_work) {
1069f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1070f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1071f44d3a6dSSatish Balay   }
1072f44d3a6dSSatish Balay   work = a->mult_work;
1073f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1074f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1075f44d3a6dSSatish Balay     ncols = n*bs;
1076f44d3a6dSSatish Balay     workt = work;
1077f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1078f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1079f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1080f44d3a6dSSatish Balay       workt += bs;
1081f44d3a6dSSatish Balay     }
1082f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1083f44d3a6dSSatish Balay     v += n*bs2;
1084f44d3a6dSSatish Balay     z += bs;
1085f44d3a6dSSatish Balay   }
1086c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1087c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1088f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1089f44d3a6dSSatish Balay   return 0;
1090f44d3a6dSSatish Balay }
1091f44d3a6dSSatish Balay 
10921a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1093bb42667fSSatish Balay {
1094bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
10951a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1096bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1097bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
10989df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1099119a934aSSatish Balay 
1100119a934aSSatish Balay 
1101bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1102bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1103bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1104bb42667fSSatish Balay 
1105119a934aSSatish Balay   idx   = a->j;
1106119a934aSSatish Balay   v     = a->a;
1107119a934aSSatish Balay   ii    = a->i;
1108119a934aSSatish Balay 
1109119a934aSSatish Balay   switch (bs) {
1110119a934aSSatish Balay   case 1:
1111119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1112119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1113119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1114119a934aSSatish Balay       ib = idx + ai[i];
1115119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1116bb1453f0SSatish Balay         rval    = ib[j];
1117bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1118119a934aSSatish Balay       }
1119119a934aSSatish Balay     }
1120119a934aSSatish Balay     break;
1121119a934aSSatish Balay   case 2:
1122119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1123119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1124119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1125119a934aSSatish Balay       ib = idx + ai[i];
1126119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1127119a934aSSatish Balay         rval      = ib[j]*2;
1128bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1129bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1130119a934aSSatish Balay         v += 4;
1131119a934aSSatish Balay       }
1132119a934aSSatish Balay     }
1133119a934aSSatish Balay     break;
1134119a934aSSatish Balay   case 3:
1135119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1136119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1137119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1138119a934aSSatish Balay       ib = idx + ai[i];
1139119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1140119a934aSSatish Balay         rval      = ib[j]*3;
1141bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1142bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1143bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1144119a934aSSatish Balay         v += 9;
1145119a934aSSatish Balay       }
1146119a934aSSatish Balay     }
1147119a934aSSatish Balay     break;
1148119a934aSSatish Balay   case 4:
1149119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1150119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1151119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1152119a934aSSatish Balay       ib = idx + ai[i];
1153119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1154119a934aSSatish Balay         rval      = ib[j]*4;
1155bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1156bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1157bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1158bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1159119a934aSSatish Balay         v += 16;
1160119a934aSSatish Balay       }
1161119a934aSSatish Balay     }
1162119a934aSSatish Balay     break;
1163119a934aSSatish Balay   case 5:
1164119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1165119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1166119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1167119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1168119a934aSSatish Balay       ib = idx + ai[i];
1169119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1170119a934aSSatish Balay         rval      = ib[j]*5;
1171bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1172bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1173bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1174bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1175bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1176119a934aSSatish Balay         v += 25;
1177119a934aSSatish Balay       }
1178119a934aSSatish Balay     }
1179119a934aSSatish Balay     break;
1180119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1181119a934aSSatish Balay     default: {
1182119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1183119a934aSSatish Balay       if (!a->mult_work) {
11843b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1185bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1186119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1187119a934aSSatish Balay       }
1188119a934aSSatish Balay       work = a->mult_work;
1189119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1190119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1191119a934aSSatish Balay         ncols = n*bs;
1192119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1193119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1194119a934aSSatish Balay         v += n*bs2;
1195119a934aSSatish Balay         x += bs;
1196119a934aSSatish Balay         workt = work;
1197119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1198119a934aSSatish Balay           zb = z + bs*(*idx++);
1199bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1200119a934aSSatish Balay           workt += bs;
1201119a934aSSatish Balay         }
1202119a934aSSatish Balay       }
1203119a934aSSatish Balay     }
1204119a934aSSatish Balay   }
12050419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12060419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1207faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1208faf6580fSSatish Balay   return 0;
1209faf6580fSSatish Balay }
1210faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1211faf6580fSSatish Balay {
1212faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1213faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1214faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1215faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1216faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1217faf6580fSSatish Balay 
1218faf6580fSSatish Balay 
1219faf6580fSSatish Balay 
1220faf6580fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1221faf6580fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1222faf6580fSSatish Balay 
1223648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1224648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1225648c1d95SSatish Balay 
1226faf6580fSSatish Balay 
1227faf6580fSSatish Balay   idx   = a->j;
1228faf6580fSSatish Balay   v     = a->a;
1229faf6580fSSatish Balay   ii    = a->i;
1230faf6580fSSatish Balay 
1231faf6580fSSatish Balay   switch (bs) {
1232faf6580fSSatish Balay   case 1:
1233faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1234faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1235faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1236faf6580fSSatish Balay       ib = idx + ai[i];
1237faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1238faf6580fSSatish Balay         rval    = ib[j];
1239faf6580fSSatish Balay         z[rval] += *v++ * x1;
1240faf6580fSSatish Balay       }
1241faf6580fSSatish Balay     }
1242faf6580fSSatish Balay     break;
1243faf6580fSSatish Balay   case 2:
1244faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1245faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1246faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1247faf6580fSSatish Balay       ib = idx + ai[i];
1248faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1249faf6580fSSatish Balay         rval      = ib[j]*2;
1250faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1251faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1252faf6580fSSatish Balay         v += 4;
1253faf6580fSSatish Balay       }
1254faf6580fSSatish Balay     }
1255faf6580fSSatish Balay     break;
1256faf6580fSSatish Balay   case 3:
1257faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1258faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1259faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1260faf6580fSSatish Balay       ib = idx + ai[i];
1261faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1262faf6580fSSatish Balay         rval      = ib[j]*3;
1263faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1264faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1265faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1266faf6580fSSatish Balay         v += 9;
1267faf6580fSSatish Balay       }
1268faf6580fSSatish Balay     }
1269faf6580fSSatish Balay     break;
1270faf6580fSSatish Balay   case 4:
1271faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1272faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1273faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1274faf6580fSSatish Balay       ib = idx + ai[i];
1275faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1276faf6580fSSatish Balay         rval      = ib[j]*4;
1277faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1278faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1279faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1280faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1281faf6580fSSatish Balay         v += 16;
1282faf6580fSSatish Balay       }
1283faf6580fSSatish Balay     }
1284faf6580fSSatish Balay     break;
1285faf6580fSSatish Balay   case 5:
1286faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1287faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1288faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1289faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1290faf6580fSSatish Balay       ib = idx + ai[i];
1291faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1292faf6580fSSatish Balay         rval      = ib[j]*5;
1293faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1294faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1295faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1296faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1297faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1298faf6580fSSatish Balay         v += 25;
1299faf6580fSSatish Balay       }
1300faf6580fSSatish Balay     }
1301faf6580fSSatish Balay     break;
1302faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1303faf6580fSSatish Balay     default: {
1304faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1305faf6580fSSatish Balay       if (!a->mult_work) {
1306faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1307faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1308faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1309faf6580fSSatish Balay       }
1310faf6580fSSatish Balay       work = a->mult_work;
1311faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1312faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1313faf6580fSSatish Balay         ncols = n*bs;
1314faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1315faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1316faf6580fSSatish Balay         v += n*bs2;
1317faf6580fSSatish Balay         x += bs;
1318faf6580fSSatish Balay         workt = work;
1319faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1320faf6580fSSatish Balay           zb = z + bs*(*idx++);
1321faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1322faf6580fSSatish Balay           workt += bs;
1323faf6580fSSatish Balay         }
1324faf6580fSSatish Balay       }
1325faf6580fSSatish Balay     }
1326faf6580fSSatish Balay   }
1327faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1328faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1329faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
13302593348eSBarry Smith   return 0;
13312593348eSBarry Smith }
13322593348eSBarry Smith 
13334e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
13342593348eSBarry Smith {
1335b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13364e220ebcSLois Curfman McInnes 
13374e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->m;
13384e220ebcSLois Curfman McInnes   info->columns_global = (double)a->n;
13394e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
13404e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->n;
13414e220ebcSLois Curfman McInnes   info->block_size     = a->bs2;
13424e220ebcSLois Curfman McInnes   info->nz_allocated   = a->maxnz;
13434e220ebcSLois Curfman McInnes   info->nz_used        = a->bs2*a->nz;
13444e220ebcSLois Curfman McInnes   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
13454e220ebcSLois Curfman McInnes   /*  if (info->nz_unneeded != A->info.nz_unneeded)
13464e220ebcSLois Curfman McInnes     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
13474e220ebcSLois Curfman McInnes   info->assemblies   = A->num_ass;
13484e220ebcSLois Curfman McInnes   info->mallocs      = a->reallocs;
13494e220ebcSLois Curfman McInnes   info->memory       = A->mem;
13504e220ebcSLois Curfman McInnes   if (A->factor) {
13514e220ebcSLois Curfman McInnes     info->fill_ratio_given  = A->info.fill_ratio_given;
13524e220ebcSLois Curfman McInnes     info->fill_ratio_needed = A->info.fill_ratio_needed;
13534e220ebcSLois Curfman McInnes     info->factor_mallocs    = A->info.factor_mallocs;
13544e220ebcSLois Curfman McInnes   } else {
13554e220ebcSLois Curfman McInnes     info->fill_ratio_given  = 0;
13564e220ebcSLois Curfman McInnes     info->fill_ratio_needed = 0;
13574e220ebcSLois Curfman McInnes     info->factor_mallocs    = 0;
13584e220ebcSLois Curfman McInnes   }
13592593348eSBarry Smith   return 0;
13602593348eSBarry Smith }
13612593348eSBarry Smith 
136291d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
136391d316f6SSatish Balay {
136491d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
136591d316f6SSatish Balay 
136691d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
136791d316f6SSatish Balay 
136891d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
136991d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1370a7c10996SSatish Balay       (a->nz != b->nz)) {
137191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137291d316f6SSatish Balay   }
137391d316f6SSatish Balay 
137491d316f6SSatish Balay   /* if the a->i are the same */
137591d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
137691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137791d316f6SSatish Balay   }
137891d316f6SSatish Balay 
137991d316f6SSatish Balay   /* if a->j are the same */
138091d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
138191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138291d316f6SSatish Balay   }
138391d316f6SSatish Balay 
138491d316f6SSatish Balay   /* if a->a are the same */
138591d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
138691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138791d316f6SSatish Balay   }
138891d316f6SSatish Balay   *flg = PETSC_TRUE;
138991d316f6SSatish Balay   return 0;
139091d316f6SSatish Balay 
139191d316f6SSatish Balay }
139291d316f6SSatish Balay 
139391d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
139491d316f6SSatish Balay {
139591d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13967e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
139717e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
139817e48fc4SSatish Balay 
139917e48fc4SSatish Balay   bs   = a->bs;
140017e48fc4SSatish Balay   aa   = a->a;
140117e48fc4SSatish Balay   ai   = a->i;
140217e48fc4SSatish Balay   aj   = a->j;
140317e48fc4SSatish Balay   ambs = a->mbs;
14049df24120SSatish Balay   bs2  = a->bs2;
140591d316f6SSatish Balay 
140691d316f6SSatish Balay   VecSet(&zero,v);
140791d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
14089867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
140917e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
141017e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
141117e48fc4SSatish Balay       if (aj[j] == i) {
141217e48fc4SSatish Balay         row  = i*bs;
14137e67e3f9SSatish Balay         aa_j = aa+j*bs2;
14147e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
141591d316f6SSatish Balay         break;
141691d316f6SSatish Balay       }
141791d316f6SSatish Balay     }
141891d316f6SSatish Balay   }
141991d316f6SSatish Balay   return 0;
142091d316f6SSatish Balay }
142191d316f6SSatish Balay 
14229867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
14239867e207SSatish Balay {
14249867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14259867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
14267e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
14279867e207SSatish Balay 
14289867e207SSatish Balay   ai  = a->i;
14299867e207SSatish Balay   aj  = a->j;
14309867e207SSatish Balay   aa  = a->a;
14319867e207SSatish Balay   m   = a->m;
14329867e207SSatish Balay   n   = a->n;
14339867e207SSatish Balay   bs  = a->bs;
14349867e207SSatish Balay   mbs = a->mbs;
14359df24120SSatish Balay   bs2 = a->bs2;
14369867e207SSatish Balay   if (ll) {
14379867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
14389867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
14399867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14409867e207SSatish Balay       M  = ai[i+1] - ai[i];
14419867e207SSatish Balay       li = l + i*bs;
14427e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14439867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14447e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
14459867e207SSatish Balay           (*v++) *= li[k%bs];
14469867e207SSatish Balay         }
14479867e207SSatish Balay       }
14489867e207SSatish Balay     }
14499867e207SSatish Balay   }
14509867e207SSatish Balay 
14519867e207SSatish Balay   if (rr) {
14529867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
14539867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
14549867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14559867e207SSatish Balay       M  = ai[i+1] - ai[i];
14567e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14579867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14589867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
14599867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
14609867e207SSatish Balay           x = ri[k];
14619867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
14629867e207SSatish Balay         }
14639867e207SSatish Balay       }
14649867e207SSatish Balay     }
14659867e207SSatish Balay   }
14669867e207SSatish Balay   return 0;
14679867e207SSatish Balay }
14689867e207SSatish Balay 
14699867e207SSatish Balay 
1470b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1471b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
14722a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1473736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1474736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
14751a6a6d98SBarry Smith 
14761a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
14771a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
14781a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
14791a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
14801a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
14811a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
14821a6a6d98SBarry Smith 
14837fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
14847fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
14857fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
14867fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
14877fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
14887fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
14892593348eSBarry Smith 
1490b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
14912593348eSBarry Smith {
1492b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14932593348eSBarry Smith   Scalar      *v = a->a;
14942593348eSBarry Smith   double      sum = 0.0;
14959df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
14962593348eSBarry Smith 
14972593348eSBarry Smith   if (type == NORM_FROBENIUS) {
14980419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
14992593348eSBarry Smith #if defined(PETSC_COMPLEX)
15002593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
15012593348eSBarry Smith #else
15022593348eSBarry Smith       sum += (*v)*(*v); v++;
15032593348eSBarry Smith #endif
15042593348eSBarry Smith     }
15052593348eSBarry Smith     *norm = sqrt(sum);
15062593348eSBarry Smith   }
15072593348eSBarry Smith   else {
1508b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
15092593348eSBarry Smith   }
15102593348eSBarry Smith   return 0;
15112593348eSBarry Smith }
15122593348eSBarry Smith 
15132593348eSBarry Smith /*
15142593348eSBarry Smith      note: This can only work for identity for row and col. It would
15152593348eSBarry Smith    be good to check this and otherwise generate an error.
15162593348eSBarry Smith */
1517b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
15182593348eSBarry Smith {
1519b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15202593348eSBarry Smith   Mat         outA;
1521de6a44a3SBarry Smith   int         ierr;
15222593348eSBarry Smith 
1523b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
15242593348eSBarry Smith 
15252593348eSBarry Smith   outA          = inA;
15262593348eSBarry Smith   inA->factor   = FACTOR_LU;
15272593348eSBarry Smith   a->row        = row;
15282593348eSBarry Smith   a->col        = col;
15292593348eSBarry Smith 
1530de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
15312593348eSBarry Smith 
15322593348eSBarry Smith   if (!a->diag) {
1533b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
15342593348eSBarry Smith   }
15357fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
15362593348eSBarry Smith   return 0;
15372593348eSBarry Smith }
15382593348eSBarry Smith 
1539b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
15402593348eSBarry Smith {
1541b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15429df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1543b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1544b6490206SBarry Smith   PLogFlops(totalnz);
15452593348eSBarry Smith   return 0;
15462593348eSBarry Smith }
15472593348eSBarry Smith 
15487e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
15497e67e3f9SSatish Balay {
15507e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15517e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1552a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
15539df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
15547e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
15557e67e3f9SSatish Balay 
15567e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
15577e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
15587e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
15597e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1560a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
15617e67e3f9SSatish Balay     nrow = ailen[brow];
15627e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
15637e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
15647e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1565a7c10996SSatish Balay       col  = in[l] ;
15667e67e3f9SSatish Balay       bcol = col/bs;
15677e67e3f9SSatish Balay       cidx = col%bs;
15687e67e3f9SSatish Balay       ridx = row%bs;
15697e67e3f9SSatish Balay       high = nrow;
15707e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
15717e67e3f9SSatish Balay       while (high-low > 5) {
15727e67e3f9SSatish Balay         t = (low+high)/2;
15737e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
15747e67e3f9SSatish Balay         else             low  = t;
15757e67e3f9SSatish Balay       }
15767e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
15777e67e3f9SSatish Balay         if (rp[i] > bcol) break;
15787e67e3f9SSatish Balay         if (rp[i] == bcol) {
15797e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
15807e67e3f9SSatish Balay           goto finished;
15817e67e3f9SSatish Balay         }
15827e67e3f9SSatish Balay       }
15837e67e3f9SSatish Balay       *v++ = zero;
15847e67e3f9SSatish Balay       finished:;
15857e67e3f9SSatish Balay     }
15867e67e3f9SSatish Balay   }
15877e67e3f9SSatish Balay   return 0;
15887e67e3f9SSatish Balay }
15897e67e3f9SSatish Balay 
15905a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
15915a838052SSatish Balay {
15925a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
15935a838052SSatish Balay   *bs = baij->bs;
15945a838052SSatish Balay   return 0;
15955a838052SSatish Balay }
15965a838052SSatish Balay 
1597d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1598d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1599d9b7c43dSSatish Balay {
1600d9b7c43dSSatish Balay   int i,row;
1601d9b7c43dSSatish Balay   row = idx[0];
1602d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1603d9b7c43dSSatish Balay 
1604d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1605d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1606d9b7c43dSSatish Balay   }
1607d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1608d9b7c43dSSatish Balay   return 0;
1609d9b7c43dSSatish Balay }
1610d9b7c43dSSatish Balay 
1611d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1612d9b7c43dSSatish Balay {
1613d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1614d9b7c43dSSatish Balay   IS          is_local;
1615d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1616d9b7c43dSSatish Balay   PetscTruth  flg;
1617d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1618d9b7c43dSSatish Balay 
1619d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1620d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1621d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1622537820f0SBarry Smith   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1623d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1624d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1625d9b7c43dSSatish Balay 
1626d9b7c43dSSatish Balay   i = 0;
1627d9b7c43dSSatish Balay   while (i < is_n) {
1628d9b7c43dSSatish Balay     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1629d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1630d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1631d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1632d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1633d9b7c43dSSatish Balay       i += bs;
1634d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1635d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1636d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1637d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1638d9b7c43dSSatish Balay         aa[0] = zero;
1639d9b7c43dSSatish Balay         aa+=bs;
1640d9b7c43dSSatish Balay       }
1641d9b7c43dSSatish Balay       i++;
1642d9b7c43dSSatish Balay     }
1643d9b7c43dSSatish Balay   }
1644d9b7c43dSSatish Balay   if (diag) {
1645d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1646d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1647d9b7c43dSSatish Balay     }
1648d9b7c43dSSatish Balay   }
1649d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1650d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1651d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1652d9b7c43dSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1653d9b7c43dSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1654d9b7c43dSSatish Balay 
1655d9b7c43dSSatish Balay   return 0;
1656d9b7c43dSSatish Balay }
1657ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1658ba4ca20aSSatish Balay {
1659ba4ca20aSSatish Balay   static int called = 0;
1660ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1661ba4ca20aSSatish Balay 
1662ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1663ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1664ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1665ba4ca20aSSatish Balay   return 0;
1666ba4ca20aSSatish Balay }
1667d9b7c43dSSatish Balay 
16682593348eSBarry Smith /* -------------------------------------------------------------------*/
1669cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
16709867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1671f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1672faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
16731a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1674b6490206SBarry Smith        0,0,
1675de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1676b6490206SBarry Smith        0,
1677f2501298SSatish Balay        MatTranspose_SeqBAIJ,
167817e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
16799867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1680584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1681b6490206SBarry Smith        0,
1682d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
16837fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1684b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1685de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1686b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1687b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1688b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1689af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
16907e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1691ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
16923b2fbd54SBarry Smith        0,0,0,MatGetBlockSize_SeqBAIJ,
16933b2fbd54SBarry Smith        MatGetRowIJ_SeqBAIJ,
16943b2fbd54SBarry Smith        MatRestoreRowIJ_SeqBAIJ};
16952593348eSBarry Smith 
16962593348eSBarry Smith /*@C
169744cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
169844cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
169944cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
17002bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
17012bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
17022593348eSBarry Smith 
17032593348eSBarry Smith    Input Parameters:
17042593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1705b6490206SBarry Smith .  bs - size of block
17062593348eSBarry Smith .  m - number of rows
17072593348eSBarry Smith .  n - number of columns
1708b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
17092bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
17102bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
17112593348eSBarry Smith 
17122593348eSBarry Smith    Output Parameter:
17132593348eSBarry Smith .  A - the matrix
17142593348eSBarry Smith 
17152593348eSBarry Smith    Notes:
171644cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
17172593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
171844cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
17192593348eSBarry Smith 
17202593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
17212593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
17222593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
17232593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
17242593348eSBarry Smith 
172544cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17262593348eSBarry Smith @*/
1727b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
17282593348eSBarry Smith {
17292593348eSBarry Smith   Mat         B;
1730b6490206SBarry Smith   Mat_SeqBAIJ *b;
17313b2fbd54SBarry Smith   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
17323b2fbd54SBarry Smith 
17333b2fbd54SBarry Smith   MPI_Comm_size(comm,&size);
17343b2fbd54SBarry Smith   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1735b6490206SBarry Smith 
1736f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1737f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
17382593348eSBarry Smith 
17392593348eSBarry Smith   *A = 0;
1740b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
17412593348eSBarry Smith   PLogObjectCreate(B);
1742b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
174344cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
17442593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17451a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
17461a6a6d98SBarry Smith   if (!flg) {
17477fc0212eSBarry Smith     switch (bs) {
17487fc0212eSBarry Smith       case 1:
17497fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17501a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
175139b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1752f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
17537fc0212eSBarry Smith        break;
17544eeb42bcSBarry Smith       case 2:
17554eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
17561a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
175739b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1758f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
17596d84be18SBarry Smith         break;
1760f327dd97SBarry Smith       case 3:
1761f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
17621a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
176339b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1764f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
17654eeb42bcSBarry Smith         break;
17666d84be18SBarry Smith       case 4:
17676d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
17681a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
176939b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1770f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
17716d84be18SBarry Smith         break;
17726d84be18SBarry Smith       case 5:
17736d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
17741a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
177539b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1776f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
17776d84be18SBarry Smith         break;
17787fc0212eSBarry Smith     }
17791a6a6d98SBarry Smith   }
1780b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1781b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
17822593348eSBarry Smith   B->factor           = 0;
17832593348eSBarry Smith   B->lupivotthreshold = 1.0;
17842593348eSBarry Smith   b->row              = 0;
17852593348eSBarry Smith   b->col              = 0;
17862593348eSBarry Smith   b->reallocs         = 0;
17872593348eSBarry Smith 
178844cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
178944cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1790b6490206SBarry Smith   b->mbs     = mbs;
1791f2501298SSatish Balay   b->nbs     = nbs;
1792b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
17932593348eSBarry Smith   if (nnz == PETSC_NULL) {
1794119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
17952593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1796b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1797b6490206SBarry Smith     nz = nz*mbs;
17982593348eSBarry Smith   }
17992593348eSBarry Smith   else {
18002593348eSBarry Smith     nz = 0;
1801b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
18022593348eSBarry Smith   }
18032593348eSBarry Smith 
18042593348eSBarry Smith   /* allocate the matrix space */
18057e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
18062593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
18077e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
18087e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
18092593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
18102593348eSBarry Smith   b->i  = b->j + nz;
18112593348eSBarry Smith   b->singlemalloc = 1;
18122593348eSBarry Smith 
1813de6a44a3SBarry Smith   b->i[0] = 0;
1814b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
18152593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
18162593348eSBarry Smith   }
18172593348eSBarry Smith 
1818b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1819b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1820b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1821b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
18222593348eSBarry Smith 
1823b6490206SBarry Smith   b->bs               = bs;
18249df24120SSatish Balay   b->bs2              = bs2;
1825b6490206SBarry Smith   b->mbs              = mbs;
18262593348eSBarry Smith   b->nz               = 0;
182718e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
18282593348eSBarry Smith   b->sorted           = 0;
18292593348eSBarry Smith   b->roworiented      = 1;
18302593348eSBarry Smith   b->nonew            = 0;
18312593348eSBarry Smith   b->diag             = 0;
18322593348eSBarry Smith   b->solve_work       = 0;
1833de6a44a3SBarry Smith   b->mult_work        = 0;
18342593348eSBarry Smith   b->spptr            = 0;
18354e220ebcSLois Curfman McInnes   B->info.nz_unneeded = (double)b->maxnz;
18364e220ebcSLois Curfman McInnes 
18372593348eSBarry Smith   *A = B;
18382593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
18392593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
18402593348eSBarry Smith   return 0;
18412593348eSBarry Smith }
18422593348eSBarry Smith 
1843b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
18442593348eSBarry Smith {
18452593348eSBarry Smith   Mat         C;
1846b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
18479df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1848de6a44a3SBarry Smith 
1849de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
18502593348eSBarry Smith 
18512593348eSBarry Smith   *B = 0;
1852b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
18532593348eSBarry Smith   PLogObjectCreate(C);
1854b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
18552593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1856b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1857b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
18582593348eSBarry Smith   C->factor     = A->factor;
18592593348eSBarry Smith   c->row        = 0;
18602593348eSBarry Smith   c->col        = 0;
18612593348eSBarry Smith   C->assembled  = PETSC_TRUE;
18622593348eSBarry Smith 
186344cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
186444cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
186544cd7ae7SLois Curfman McInnes   C->M          = a->m;
186644cd7ae7SLois Curfman McInnes   C->N          = a->n;
186744cd7ae7SLois Curfman McInnes 
1868b6490206SBarry Smith   c->bs         = a->bs;
18699df24120SSatish Balay   c->bs2        = a->bs2;
1870b6490206SBarry Smith   c->mbs        = a->mbs;
1871b6490206SBarry Smith   c->nbs        = a->nbs;
18722593348eSBarry Smith 
1873b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1874b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1875b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
18762593348eSBarry Smith     c->imax[i] = a->imax[i];
18772593348eSBarry Smith     c->ilen[i] = a->ilen[i];
18782593348eSBarry Smith   }
18792593348eSBarry Smith 
18802593348eSBarry Smith   /* allocate the matrix space */
18812593348eSBarry Smith   c->singlemalloc = 1;
18827e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
18832593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
18847e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1885de6a44a3SBarry Smith   c->i  = c->j + nz;
1886b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1887b6490206SBarry Smith   if (mbs > 0) {
1888de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
18892593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
18907e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
18912593348eSBarry Smith     }
18922593348eSBarry Smith   }
18932593348eSBarry Smith 
1894b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
18952593348eSBarry Smith   c->sorted      = a->sorted;
18962593348eSBarry Smith   c->roworiented = a->roworiented;
18972593348eSBarry Smith   c->nonew       = a->nonew;
18982593348eSBarry Smith 
18992593348eSBarry Smith   if (a->diag) {
1900b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1901b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1902b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
19032593348eSBarry Smith       c->diag[i] = a->diag[i];
19042593348eSBarry Smith     }
19052593348eSBarry Smith   }
19062593348eSBarry Smith   else c->diag          = 0;
19072593348eSBarry Smith   c->nz                 = a->nz;
19082593348eSBarry Smith   c->maxnz              = a->maxnz;
19092593348eSBarry Smith   c->solve_work         = 0;
19102593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
19117fc0212eSBarry Smith   c->mult_work          = 0;
19122593348eSBarry Smith   *B = C;
19132593348eSBarry Smith   return 0;
19142593348eSBarry Smith }
19152593348eSBarry Smith 
191619bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
19172593348eSBarry Smith {
1918b6490206SBarry Smith   Mat_SeqBAIJ  *a;
19192593348eSBarry Smith   Mat          B;
1920de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1921b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
192235aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1923a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1924b6490206SBarry Smith   Scalar       *aa;
192519bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
19262593348eSBarry Smith 
19271a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1928de6a44a3SBarry Smith   bs2  = bs*bs;
1929b6490206SBarry Smith 
19302593348eSBarry Smith   MPI_Comm_size(comm,&size);
1931b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
193290ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
193377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1934de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
19352593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
19362593348eSBarry Smith 
1937b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
193835aab85fSBarry Smith 
193935aab85fSBarry Smith   /*
194035aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
194135aab85fSBarry Smith     divisible by the blocksize
194235aab85fSBarry Smith   */
1943b6490206SBarry Smith   mbs        = M/bs;
194435aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
194535aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
194635aab85fSBarry Smith   else                  mbs++;
194735aab85fSBarry Smith   if (extra_rows) {
1948537820f0SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
194935aab85fSBarry Smith   }
1950b6490206SBarry Smith 
19512593348eSBarry Smith   /* read in row lengths */
195235aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
195377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
195435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
19552593348eSBarry Smith 
1956b6490206SBarry Smith   /* read in column indices */
195735aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
195877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
195935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1960b6490206SBarry Smith 
1961b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1962b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1963b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
196435aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
196535aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
196635aab85fSBarry Smith   masked = mask + mbs;
1967b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1968b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
196935aab85fSBarry Smith     nmask = 0;
1970b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1971b6490206SBarry Smith       kmax = rowlengths[rowcount];
1972b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
197335aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
197435aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1975b6490206SBarry Smith       }
1976b6490206SBarry Smith       rowcount++;
1977b6490206SBarry Smith     }
197835aab85fSBarry Smith     browlengths[i] += nmask;
197935aab85fSBarry Smith     /* zero out the mask elements we set */
198035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1981b6490206SBarry Smith   }
1982b6490206SBarry Smith 
19832593348eSBarry Smith   /* create our matrix */
198435aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
198535aab85fSBarry Smith          CHKERRQ(ierr);
19862593348eSBarry Smith   B = *A;
1987b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
19882593348eSBarry Smith 
19892593348eSBarry Smith   /* set matrix "i" values */
1990de6a44a3SBarry Smith   a->i[0] = 0;
1991b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1992b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1993b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
19942593348eSBarry Smith   }
1995b6490206SBarry Smith   a->nz         = 0;
1996b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
19972593348eSBarry Smith 
1998b6490206SBarry Smith   /* read in nonzero values */
199935aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
200077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
200135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2002b6490206SBarry Smith 
2003b6490206SBarry Smith   /* set "a" and "j" values into matrix */
2004b6490206SBarry Smith   nzcount = 0; jcount = 0;
2005b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
2006b6490206SBarry Smith     nzcountb = nzcount;
200735aab85fSBarry Smith     nmask    = 0;
2008b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2009b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2010b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
201135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
201235aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2013b6490206SBarry Smith       }
2014b6490206SBarry Smith       rowcount++;
2015b6490206SBarry Smith     }
2016de6a44a3SBarry Smith     /* sort the masked values */
201777c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2018de6a44a3SBarry Smith 
2019b6490206SBarry Smith     /* set "j" values into matrix */
2020b6490206SBarry Smith     maskcount = 1;
202135aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
202235aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2023de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2024b6490206SBarry Smith     }
2025b6490206SBarry Smith     /* set "a" values into matrix */
2026de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2027b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2028b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2029b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2030de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2031de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2032de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2033de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2034b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2035b6490206SBarry Smith       }
2036b6490206SBarry Smith     }
203735aab85fSBarry Smith     /* zero out the mask elements we set */
203835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2039b6490206SBarry Smith   }
204035aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2041b6490206SBarry Smith 
2042b6490206SBarry Smith   PetscFree(rowlengths);
2043b6490206SBarry Smith   PetscFree(browlengths);
2044b6490206SBarry Smith   PetscFree(aa);
2045b6490206SBarry Smith   PetscFree(jj);
2046b6490206SBarry Smith   PetscFree(mask);
2047b6490206SBarry Smith 
2048b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2049de6a44a3SBarry Smith 
2050de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2051de6a44a3SBarry Smith   if (flg) {
205219bcc07fSBarry Smith     Viewer tviewer;
205319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2054*639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
205519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
205619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2057de6a44a3SBarry Smith   }
2058de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2059de6a44a3SBarry Smith   if (flg) {
206019bcc07fSBarry Smith     Viewer tviewer;
206119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2062*639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
206319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
206419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2065de6a44a3SBarry Smith   }
2066de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2067de6a44a3SBarry Smith   if (flg) {
206819bcc07fSBarry Smith     Viewer tviewer;
206919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
207019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
207119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2072de6a44a3SBarry Smith   }
2073de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2074de6a44a3SBarry Smith   if (flg) {
207519bcc07fSBarry Smith     Viewer tviewer;
207619bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2077*639f9d9dSBarry Smith     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
207819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
207919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2080de6a44a3SBarry Smith   }
2081de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2082de6a44a3SBarry Smith   if (flg) {
208319bcc07fSBarry Smith     Viewer tviewer;
208419bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
208519bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
208619bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
208719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2088de6a44a3SBarry Smith   }
20892593348eSBarry Smith   return 0;
20902593348eSBarry Smith }
20912593348eSBarry Smith 
20922593348eSBarry Smith 
20932593348eSBarry Smith 
2094