xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 70f55243aafb320636e2a54ff30cab5d1e8d3d7b)
12593348eSBarry Smith #ifndef lint
2*70f55243SBarry Smith static char vcid[] = "$Id: baij.c,v 1.64 1996/08/06 04:02:46 bsmith Exp bsmith $";
32593348eSBarry Smith #endif
42593348eSBarry Smith 
52593348eSBarry Smith /*
6b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
72593348eSBarry Smith   matrix storage format.
82593348eSBarry Smith */
9*70f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h"
101a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
111a6a6d98SBarry Smith #include "src/inline/spops.h"
122593348eSBarry Smith #include "petsc.h"
133b547af2SSatish Balay 
14bcd2baecSBarry Smith extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
152593348eSBarry Smith 
16a2ce50c7SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
172593348eSBarry Smith {
18b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
19bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
202593348eSBarry Smith 
212593348eSBarry Smith   /*
222593348eSBarry Smith      this is tacky: In the future when we have written special factorization
232593348eSBarry Smith      and solve routines for the identity permutation we should use a
242593348eSBarry Smith      stride index set instead of the general one.
252593348eSBarry Smith   */
262593348eSBarry Smith   if (type  == ORDER_NATURAL) {
272593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
282593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
292593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
312593348eSBarry Smith     PetscFree(idx);
322593348eSBarry Smith     ISSetPermutation(*rperm);
332593348eSBarry Smith     ISSetPermutation(*cperm);
342593348eSBarry Smith     ISSetIdentity(*rperm);
352593348eSBarry Smith     ISSetIdentity(*cperm);
362593348eSBarry Smith     return 0;
372593348eSBarry Smith   }
382593348eSBarry Smith 
39bcd2baecSBarry Smith   MatReorderingRegisterAll();
40a7c10996SSatish Balay   ishift = 0;
41bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
42bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
431a6a6d98SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
441a6a6d98SBarry Smith     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
452593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
46bcd2baecSBarry Smith   } else {
47bcd2baecSBarry Smith     if (ishift == oshift) {
481a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
49bcd2baecSBarry Smith     }
50bcd2baecSBarry Smith     else if (ishift == -1) {
51bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
521a6a6d98SBarry Smith       int nz = a->i[n] - 1;
53bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
541a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
551a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
56bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
571a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
58bcd2baecSBarry Smith     } else {
59bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
601a6a6d98SBarry Smith       int nz = a->i[n] - 1;
61bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
621a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
631a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
64bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
651a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
66bcd2baecSBarry Smith     }
67bcd2baecSBarry Smith   }
682593348eSBarry Smith   return 0;
692593348eSBarry Smith }
702593348eSBarry Smith 
71de6a44a3SBarry Smith /*
72de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
73de6a44a3SBarry Smith */
74de6a44a3SBarry Smith 
75de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
76de6a44a3SBarry Smith {
77de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
787fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
79de6a44a3SBarry Smith 
80de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
81de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
827fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
83de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
84de6a44a3SBarry Smith       if (a->j[j] == i) {
85de6a44a3SBarry Smith         diag[i] = j;
86de6a44a3SBarry Smith         break;
87de6a44a3SBarry Smith       }
88de6a44a3SBarry Smith     }
89de6a44a3SBarry Smith   }
90de6a44a3SBarry Smith   a->diag = diag;
91de6a44a3SBarry Smith   return 0;
92de6a44a3SBarry Smith }
932593348eSBarry Smith 
942593348eSBarry Smith #include "draw.h"
952593348eSBarry Smith #include "pinclude/pviewer.h"
9677c4ece6SBarry Smith #include "sys.h"
972593348eSBarry Smith 
98b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
992593348eSBarry Smith {
100b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1019df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
102b6490206SBarry Smith   Scalar      *aa;
1032593348eSBarry Smith 
10490ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1052593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1062593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1072593348eSBarry Smith   col_lens[1] = a->m;
1082593348eSBarry Smith   col_lens[2] = a->n;
1097e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1102593348eSBarry Smith 
1112593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
112b6490206SBarry Smith   count = 0;
113b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
114b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
115b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
116b6490206SBarry Smith     }
1172593348eSBarry Smith   }
11877c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1192593348eSBarry Smith   PetscFree(col_lens);
1202593348eSBarry Smith 
1212593348eSBarry Smith   /* store column indices (zero start index) */
1227e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
123b6490206SBarry Smith   count = 0;
124b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
125b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
126b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
127b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
128b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1292593348eSBarry Smith         }
1302593348eSBarry Smith       }
131b6490206SBarry Smith     }
132b6490206SBarry Smith   }
1337e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
134b6490206SBarry Smith   PetscFree(jj);
1352593348eSBarry Smith 
1362593348eSBarry Smith   /* store nonzero values */
1377e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
138b6490206SBarry Smith   count = 0;
139b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
140b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
141b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
142b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1437e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
144b6490206SBarry Smith         }
145b6490206SBarry Smith       }
146b6490206SBarry Smith     }
147b6490206SBarry Smith   }
1487e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
149b6490206SBarry Smith   PetscFree(aa);
1502593348eSBarry Smith   return 0;
1512593348eSBarry Smith }
1522593348eSBarry Smith 
153b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1542593348eSBarry Smith {
155b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1569df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1572593348eSBarry Smith   FILE        *fd;
1582593348eSBarry Smith   char        *outputname;
1592593348eSBarry Smith 
16090ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1612593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16290ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
1637ddc982cSLois Curfman McInnes   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
1647ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1652593348eSBarry Smith   }
16690ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
167b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1682593348eSBarry Smith   }
16944cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17044cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17144cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17244cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17344cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17444cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17544cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17644cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17744cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17844cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
17944cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18044cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18144cd7ae7SLois Curfman McInnes #else
18244cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18344cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18444cd7ae7SLois Curfman McInnes #endif
18544cd7ae7SLois Curfman McInnes           }
18644cd7ae7SLois Curfman McInnes         }
18744cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18844cd7ae7SLois Curfman McInnes       }
18944cd7ae7SLois Curfman McInnes     }
19044cd7ae7SLois Curfman McInnes   }
1912593348eSBarry Smith   else {
192b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
193b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
194b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
195b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
196b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19788685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1987e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
19988685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2007e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20188685aaeSLois Curfman McInnes           }
20288685aaeSLois Curfman McInnes           else {
2037e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20488685aaeSLois Curfman McInnes           }
20588685aaeSLois Curfman McInnes #else
2067e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20788685aaeSLois Curfman McInnes #endif
2082593348eSBarry Smith           }
2092593348eSBarry Smith         }
2102593348eSBarry Smith         fprintf(fd,"\n");
2112593348eSBarry Smith       }
2122593348eSBarry Smith     }
213b6490206SBarry Smith   }
2142593348eSBarry Smith   fflush(fd);
2152593348eSBarry Smith   return 0;
2162593348eSBarry Smith }
2172593348eSBarry Smith 
2183270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2193270192aSSatish Balay {
2203270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2213270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2223270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2233270192aSSatish Balay   Scalar       *aa;
2243270192aSSatish Balay   Draw         draw;
2253270192aSSatish Balay   DrawButton   button;
2263270192aSSatish Balay   PetscTruth   isnull;
2273270192aSSatish Balay 
2283270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2293270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2303270192aSSatish Balay 
2313270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2323270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2333270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2343270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2353270192aSSatish Balay   color = DRAW_BLUE;
2363270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2373270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2383270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2393270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2403270192aSSatish Balay       aa = a->a + j*bs2;
2413270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2423270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2433270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
2443270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2453270192aSSatish Balay         }
2463270192aSSatish Balay       }
2473270192aSSatish Balay     }
2483270192aSSatish Balay   }
2493270192aSSatish Balay   color = DRAW_CYAN;
2503270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2513270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2523270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2533270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2543270192aSSatish Balay       aa = a->a + j*bs2;
2553270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2563270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2573270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
2583270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2593270192aSSatish Balay         }
2603270192aSSatish Balay       }
2613270192aSSatish Balay     }
2623270192aSSatish Balay   }
2633270192aSSatish Balay 
2643270192aSSatish Balay   color = DRAW_RED;
2653270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2663270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2673270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2683270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2693270192aSSatish Balay       aa = a->a + j*bs2;
2703270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2713270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2723270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
2733270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2743270192aSSatish Balay         }
2753270192aSSatish Balay       }
2763270192aSSatish Balay     }
2773270192aSSatish Balay   }
2783270192aSSatish Balay 
2793270192aSSatish Balay   DrawFlush(draw);
2803270192aSSatish Balay   DrawGetPause(draw,&pause);
2813270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2823270192aSSatish Balay 
2833270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2843270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2853270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2863270192aSSatish Balay     DrawClear(draw);
2873270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2883270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2893270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2903270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2913270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2923270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2933270192aSSatish Balay     w *= scale; h *= scale;
2943270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2953270192aSSatish Balay     color = DRAW_BLUE;
2963270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2973270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2983270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2993270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3003270192aSSatish Balay         aa = a->a + j*bs2;
3013270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3023270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3033270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
3043270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3053270192aSSatish Balay           }
3063270192aSSatish Balay         }
3073270192aSSatish Balay       }
3083270192aSSatish Balay     }
3093270192aSSatish Balay     color = DRAW_CYAN;
3103270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3113270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3123270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3133270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3143270192aSSatish Balay         aa = a->a + j*bs2;
3153270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3163270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3173270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
3183270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3193270192aSSatish Balay           }
3203270192aSSatish Balay         }
3213270192aSSatish Balay       }
3223270192aSSatish Balay     }
3233270192aSSatish Balay 
3243270192aSSatish Balay     color = DRAW_RED;
3253270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3263270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3273270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3283270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3293270192aSSatish Balay         aa = a->a + j*bs2;
3303270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3313270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3323270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
3333270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3343270192aSSatish Balay           }
3353270192aSSatish Balay         }
3363270192aSSatish Balay       }
3373270192aSSatish Balay     }
3383270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3393270192aSSatish Balay   }
3403270192aSSatish Balay   return 0;
3413270192aSSatish Balay }
3423270192aSSatish Balay 
343b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3442593348eSBarry Smith {
3452593348eSBarry Smith   Mat         A = (Mat) obj;
34619bcc07fSBarry Smith   ViewerType  vtype;
34719bcc07fSBarry Smith   int         ierr;
3482593348eSBarry Smith 
34919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
351b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
3522593348eSBarry Smith   }
35319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
354b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3552593348eSBarry Smith   }
35619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
357b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3582593348eSBarry Smith   }
35919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3603270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3612593348eSBarry Smith   }
3622593348eSBarry Smith   return 0;
3632593348eSBarry Smith }
364b6490206SBarry Smith 
365119a934aSSatish Balay #define CHUNKSIZE  10
366cd0e1443SSatish Balay 
367cd0e1443SSatish Balay /* This version has row oriented v  */
368cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
369cd0e1443SSatish Balay {
370cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
371cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
372cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
373a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3749df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
375cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
376cd0e1443SSatish Balay 
377cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
378cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
379cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
380cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
381a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
382cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
383cd0e1443SSatish Balay     low = 0;
384cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
385cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
386cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
387a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
388cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
389cd0e1443SSatish Balay       if (roworiented) {
390cd0e1443SSatish Balay         value = *v++;
391cd0e1443SSatish Balay       }
392cd0e1443SSatish Balay       else {
393cd0e1443SSatish Balay         value = v[k + l*m];
394cd0e1443SSatish Balay       }
395cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
396cd0e1443SSatish Balay       while (high-low > 5) {
397cd0e1443SSatish Balay         t = (low+high)/2;
398cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
399cd0e1443SSatish Balay         else             low  = t;
400cd0e1443SSatish Balay       }
401cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
402cd0e1443SSatish Balay         if (rp[i] > bcol) break;
403cd0e1443SSatish Balay         if (rp[i] == bcol) {
4047e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
405cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
406cd0e1443SSatish Balay           else                  *bap  = value;
407cd0e1443SSatish Balay           goto noinsert;
408cd0e1443SSatish Balay         }
409cd0e1443SSatish Balay       }
410cd0e1443SSatish Balay       if (nonew) goto noinsert;
411cd0e1443SSatish Balay       if (nrow >= rmax) {
412cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
413cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
414cd0e1443SSatish Balay         Scalar *new_a;
415cd0e1443SSatish Balay 
416cd0e1443SSatish Balay         /* malloc new storage space */
4177e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
418cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4197e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
420cd0e1443SSatish Balay         new_i   = new_j + new_nz;
421cd0e1443SSatish Balay 
422cd0e1443SSatish Balay         /* copy over old data into new slots */
423cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4247e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
425a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
426a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
427a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
428cd0e1443SSatish Balay                                                            len*sizeof(int));
429a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
430a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
431a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
432a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
433cd0e1443SSatish Balay         /* free up old matrix storage */
434cd0e1443SSatish Balay         PetscFree(a->a);
435cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
436cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
437cd0e1443SSatish Balay         a->singlemalloc = 1;
438cd0e1443SSatish Balay 
439a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
440cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4417e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
44218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
443cd0e1443SSatish Balay         a->reallocs++;
444119a934aSSatish Balay         a->nz++;
445cd0e1443SSatish Balay       }
4467e67e3f9SSatish Balay       N = nrow++ - 1;
447cd0e1443SSatish Balay       /* shift up all the later entries in this row */
448cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
449cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4507e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
451cd0e1443SSatish Balay       }
4527e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
453cd0e1443SSatish Balay       rp[i] = bcol;
4547e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
455cd0e1443SSatish Balay       noinsert:;
456cd0e1443SSatish Balay       low = i;
457cd0e1443SSatish Balay     }
458cd0e1443SSatish Balay     ailen[brow] = nrow;
459cd0e1443SSatish Balay   }
460cd0e1443SSatish Balay   return 0;
461cd0e1443SSatish Balay }
462cd0e1443SSatish Balay 
4630b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4640b824a48SSatish Balay {
4650b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4660b824a48SSatish Balay   *m = a->m; *n = a->n;
4670b824a48SSatish Balay   return 0;
4680b824a48SSatish Balay }
4690b824a48SSatish Balay 
4700b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4710b824a48SSatish Balay {
4720b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4730b824a48SSatish Balay   *m = 0; *n = a->m;
4740b824a48SSatish Balay   return 0;
4750b824a48SSatish Balay }
4760b824a48SSatish Balay 
4779867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4789867e207SSatish Balay {
4799867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4807e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4819867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4829867e207SSatish Balay 
4839867e207SSatish Balay   bs  = a->bs;
4849867e207SSatish Balay   ai  = a->i;
4859867e207SSatish Balay   aj  = a->j;
4869867e207SSatish Balay   aa  = a->a;
4879df24120SSatish Balay   bs2 = a->bs2;
4889867e207SSatish Balay 
4899867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
4909867e207SSatish Balay 
4919867e207SSatish Balay   bn  = row/bs;   /* Block number */
4929867e207SSatish Balay   bp  = row % bs; /* Block Position */
4939867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
4949867e207SSatish Balay   *nz = bs*M;
4959867e207SSatish Balay 
4969867e207SSatish Balay   if (v) {
4979867e207SSatish Balay     *v = 0;
4989867e207SSatish Balay     if (*nz) {
4999867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
5009867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5019867e207SSatish Balay         v_i  = *v + i*bs;
5027e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
5037e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
5049867e207SSatish Balay       }
5059867e207SSatish Balay     }
5069867e207SSatish Balay   }
5079867e207SSatish Balay 
5089867e207SSatish Balay   if (idx) {
5099867e207SSatish Balay     *idx = 0;
5109867e207SSatish Balay     if (*nz) {
5119867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5129867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5139867e207SSatish Balay         idx_i = *idx + i*bs;
5149867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5159867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5169867e207SSatish Balay       }
5179867e207SSatish Balay     }
5189867e207SSatish Balay   }
5199867e207SSatish Balay   return 0;
5209867e207SSatish Balay }
5219867e207SSatish Balay 
5229867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5239867e207SSatish Balay {
5249867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5259867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5269867e207SSatish Balay   return 0;
5279867e207SSatish Balay }
528b6490206SBarry Smith 
529f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
530f2501298SSatish Balay {
531f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
532f2501298SSatish Balay   Mat         C;
533f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5349df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
535f2501298SSatish Balay   Scalar      *array=a->a;
536f2501298SSatish Balay 
537f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
538f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
539f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
540f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
541a7c10996SSatish Balay 
542a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
543f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
544f2501298SSatish Balay   PetscFree(col);
545f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
546f2501298SSatish Balay   cols = rows + bs;
547f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
548f2501298SSatish Balay     cols[0] = i*bs;
549f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
550f2501298SSatish Balay     len = ai[i+1] - ai[i];
551f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
552f2501298SSatish Balay       rows[0] = (*aj++)*bs;
553f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
554f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
555f2501298SSatish Balay       array += bs2;
556f2501298SSatish Balay     }
557f2501298SSatish Balay   }
5581073c447SSatish Balay   PetscFree(rows);
559f2501298SSatish Balay 
5606d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5616d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
562f2501298SSatish Balay 
563f2501298SSatish Balay   if (B != PETSC_NULL) {
564f2501298SSatish Balay     *B = C;
565f2501298SSatish Balay   } else {
566f2501298SSatish Balay     /* This isn't really an in-place transpose */
567f2501298SSatish Balay     PetscFree(a->a);
568f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
569f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
570f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
571f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
572f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
573f2501298SSatish Balay     PetscFree(a);
574f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
575f2501298SSatish Balay     PetscHeaderDestroy(C);
576f2501298SSatish Balay   }
577f2501298SSatish Balay   return 0;
578f2501298SSatish Balay }
579f2501298SSatish Balay 
580f2501298SSatish Balay 
581584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
582584200bdSSatish Balay {
583584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
584584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
585a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
5869df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
587584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
588584200bdSSatish Balay 
5896d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
590584200bdSSatish Balay 
591584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
592584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
593584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
594584200bdSSatish Balay     if (fshift) {
595a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
596584200bdSSatish Balay       N = ailen[i];
597584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
598584200bdSSatish Balay         ip[j-fshift] = ip[j];
5997e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
600584200bdSSatish Balay       }
601584200bdSSatish Balay     }
602584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
603584200bdSSatish Balay   }
604584200bdSSatish Balay   if (mbs) {
605584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
606584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
607584200bdSSatish Balay   }
608584200bdSSatish Balay   /* reset ilen and imax for each row */
609584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
610584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
611584200bdSSatish Balay   }
612a7c10996SSatish Balay   a->nz = ai[mbs];
613584200bdSSatish Balay 
614584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
615584200bdSSatish Balay   if (fshift && a->diag) {
616584200bdSSatish Balay     PetscFree(a->diag);
617584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
618584200bdSSatish Balay     a->diag = 0;
619584200bdSSatish Balay   }
62067790700SSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
621584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
622584200bdSSatish Balay            a->reallocs);
623584200bdSSatish Balay   return 0;
624584200bdSSatish Balay }
625584200bdSSatish Balay 
626b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6272593348eSBarry Smith {
628b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6299df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6302593348eSBarry Smith   return 0;
6312593348eSBarry Smith }
6322593348eSBarry Smith 
633b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6342593348eSBarry Smith {
6352593348eSBarry Smith   Mat         A  = (Mat) obj;
636b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6372593348eSBarry Smith 
6382593348eSBarry Smith #if defined(PETSC_LOG)
6392593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6402593348eSBarry Smith #endif
6412593348eSBarry Smith   PetscFree(a->a);
6422593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6432593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6442593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6452593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6462593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
647de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6482593348eSBarry Smith   PetscFree(a);
649f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
650f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6512593348eSBarry Smith   return 0;
6522593348eSBarry Smith }
6532593348eSBarry Smith 
654b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6552593348eSBarry Smith {
656b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6576d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6586d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6596d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
6606d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6616d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6626d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
6636d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
6646d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
6656d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
66694a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
6676d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
6686d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
6692593348eSBarry Smith   else
670b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
6712593348eSBarry Smith   return 0;
6722593348eSBarry Smith }
6732593348eSBarry Smith 
6742593348eSBarry Smith 
6752593348eSBarry Smith /* -------------------------------------------------------*/
6762593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
6772593348eSBarry Smith /* -------------------------------------------------------*/
678b6490206SBarry Smith #include "pinclude/plapack.h"
679b6490206SBarry Smith 
68039b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
6812593348eSBarry Smith {
682b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
68339b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
684c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
6852593348eSBarry Smith 
686c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
687c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
688b6490206SBarry Smith 
689119a934aSSatish Balay   idx   = a->j;
690119a934aSSatish Balay   v     = a->a;
691119a934aSSatish Balay   ii    = a->i;
692119a934aSSatish Balay 
693119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
694119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
695119a934aSSatish Balay     sum  = 0.0;
696119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
6971a6a6d98SBarry Smith     z[i] = sum;
698119a934aSSatish Balay   }
699c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
700c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
70139b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
70239b95ed1SBarry Smith   return 0;
70339b95ed1SBarry Smith }
70439b95ed1SBarry Smith 
70539b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
70639b95ed1SBarry Smith {
70739b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
70839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
70939b95ed1SBarry Smith   register Scalar x1,x2;
71039b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
711c16cb8f2SBarry Smith   int             j,n;
71239b95ed1SBarry Smith 
713c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
714c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
71539b95ed1SBarry Smith 
71639b95ed1SBarry Smith   idx   = a->j;
71739b95ed1SBarry Smith   v     = a->a;
71839b95ed1SBarry Smith   ii    = a->i;
71939b95ed1SBarry Smith 
720119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
721119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
722119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
723119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
724119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
725119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
726119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
727119a934aSSatish Balay       v += 4;
728119a934aSSatish Balay     }
7291a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
730119a934aSSatish Balay     z += 2;
731119a934aSSatish Balay   }
732c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
733c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
73439b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
73539b95ed1SBarry Smith   return 0;
73639b95ed1SBarry Smith }
73739b95ed1SBarry Smith 
73839b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
73939b95ed1SBarry Smith {
74039b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
74139b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
742c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
74339b95ed1SBarry Smith 
744c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
745c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
74639b95ed1SBarry Smith 
74739b95ed1SBarry Smith   idx   = a->j;
74839b95ed1SBarry Smith   v     = a->a;
74939b95ed1SBarry Smith   ii    = a->i;
75039b95ed1SBarry Smith 
751119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
752119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
753119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
754119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
755119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
756119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
757119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
758119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
759119a934aSSatish Balay       v += 9;
760119a934aSSatish Balay     }
7611a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
762119a934aSSatish Balay     z += 3;
763119a934aSSatish Balay   }
764c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
765c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
76639b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
76739b95ed1SBarry Smith   return 0;
76839b95ed1SBarry Smith }
76939b95ed1SBarry Smith 
77039b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
77139b95ed1SBarry Smith {
77239b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
77339b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
77439b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
77539b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
776c16cb8f2SBarry Smith   int             j,n;
77739b95ed1SBarry Smith 
778c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
779c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
78039b95ed1SBarry Smith 
78139b95ed1SBarry Smith   idx   = a->j;
78239b95ed1SBarry Smith   v     = a->a;
78339b95ed1SBarry Smith   ii    = a->i;
78439b95ed1SBarry Smith 
785119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
786119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
787119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
788119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
789119a934aSSatish Balay       xb = x + 4*(*idx++);
790119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
791119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
792119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
793119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
794119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
795119a934aSSatish Balay       v += 16;
796119a934aSSatish Balay     }
7971a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
798119a934aSSatish Balay     z += 4;
799119a934aSSatish Balay   }
800c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
801c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
80239b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
80339b95ed1SBarry Smith   return 0;
80439b95ed1SBarry Smith }
80539b95ed1SBarry Smith 
80639b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
80739b95ed1SBarry Smith {
80839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
80939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
81039b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
811c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
81239b95ed1SBarry Smith 
813c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
814c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
81539b95ed1SBarry Smith 
81639b95ed1SBarry Smith   idx   = a->j;
81739b95ed1SBarry Smith   v     = a->a;
81839b95ed1SBarry Smith   ii    = a->i;
81939b95ed1SBarry Smith 
820119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
821119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
822119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
823119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
824119a934aSSatish Balay       xb = x + 5*(*idx++);
825119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
826119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
827119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
828119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
829119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
830119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
831119a934aSSatish Balay       v += 25;
832119a934aSSatish Balay     }
8331a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
834119a934aSSatish Balay     z += 5;
835119a934aSSatish Balay   }
836c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
837c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
83839b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
83939b95ed1SBarry Smith   return 0;
84039b95ed1SBarry Smith }
84139b95ed1SBarry Smith 
84239b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
84339b95ed1SBarry Smith {
84439b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
84539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
846c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
847c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
848c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
84939b95ed1SBarry Smith 
850c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
851c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
85239b95ed1SBarry Smith 
85339b95ed1SBarry Smith   idx   = a->j;
85439b95ed1SBarry Smith   v     = a->a;
85539b95ed1SBarry Smith   ii    = a->i;
85639b95ed1SBarry Smith 
85739b95ed1SBarry Smith 
858119a934aSSatish Balay   if (!a->mult_work) {
8593b547af2SSatish Balay     k = PetscMax(a->m,a->n);
8602b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
861119a934aSSatish Balay   }
862119a934aSSatish Balay   work = a->mult_work;
863119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
864119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
865119a934aSSatish Balay     ncols = n*bs;
866119a934aSSatish Balay     workt = work;
867119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
868119a934aSSatish Balay       xb = x + bs*(*idx++);
869119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
870119a934aSSatish Balay       workt += bs;
871119a934aSSatish Balay     }
8721a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
873119a934aSSatish Balay     v += n*bs2;
874119a934aSSatish Balay     z += bs;
875119a934aSSatish Balay   }
876c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
877c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
8781a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
879bb42667fSSatish Balay   return 0;
880bb42667fSSatish Balay }
881bb42667fSSatish Balay 
882f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
883f44d3a6dSSatish Balay {
884f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
885f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
886c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
887f44d3a6dSSatish Balay 
888c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
889c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
890c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
891f44d3a6dSSatish Balay 
892f44d3a6dSSatish Balay   idx   = a->j;
893f44d3a6dSSatish Balay   v     = a->a;
894f44d3a6dSSatish Balay   ii    = a->i;
895f44d3a6dSSatish Balay 
896f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
897f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
898f44d3a6dSSatish Balay     sum  = y[i];
899f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
900f44d3a6dSSatish Balay     z[i] = sum;
901f44d3a6dSSatish Balay   }
902c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
903c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
904c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
905f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
906f44d3a6dSSatish Balay   return 0;
907f44d3a6dSSatish Balay }
908f44d3a6dSSatish Balay 
909f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
910f44d3a6dSSatish Balay {
911f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
912f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
913f44d3a6dSSatish Balay   register Scalar x1,x2;
914f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
915c16cb8f2SBarry Smith   int             j,n;
916f44d3a6dSSatish Balay 
917c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
918c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
919c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
920f44d3a6dSSatish Balay 
921f44d3a6dSSatish Balay   idx   = a->j;
922f44d3a6dSSatish Balay   v     = a->a;
923f44d3a6dSSatish Balay   ii    = a->i;
924f44d3a6dSSatish Balay 
925f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
926f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
927f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
928f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
929f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
930f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
931f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
932f44d3a6dSSatish Balay       v += 4;
933f44d3a6dSSatish Balay     }
934f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
935f44d3a6dSSatish Balay     z += 2; y += 2;
936f44d3a6dSSatish Balay   }
937c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
938c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
939c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
940f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
941f44d3a6dSSatish Balay   return 0;
942f44d3a6dSSatish Balay }
943f44d3a6dSSatish Balay 
944f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
945f44d3a6dSSatish Balay {
946f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
947f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
948c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
949f44d3a6dSSatish Balay 
950c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
951c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
952c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
953f44d3a6dSSatish Balay 
954f44d3a6dSSatish Balay   idx   = a->j;
955f44d3a6dSSatish Balay   v     = a->a;
956f44d3a6dSSatish Balay   ii    = a->i;
957f44d3a6dSSatish Balay 
958f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
959f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
960f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
961f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
962f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
963f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
964f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
965f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
966f44d3a6dSSatish Balay       v += 9;
967f44d3a6dSSatish Balay     }
968f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
969f44d3a6dSSatish Balay     z += 3; y += 3;
970f44d3a6dSSatish Balay   }
971c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
972c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
973c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
974f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
975f44d3a6dSSatish Balay   return 0;
976f44d3a6dSSatish Balay }
977f44d3a6dSSatish Balay 
978f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
979f44d3a6dSSatish Balay {
980f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
981f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
982f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
983f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
984c16cb8f2SBarry Smith   int             j,n;
985f44d3a6dSSatish Balay 
986c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
987c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
988c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
989f44d3a6dSSatish Balay 
990f44d3a6dSSatish Balay   idx   = a->j;
991f44d3a6dSSatish Balay   v     = a->a;
992f44d3a6dSSatish Balay   ii    = a->i;
993f44d3a6dSSatish Balay 
994f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
995f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
996f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
997f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
998f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
999f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1000f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1001f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1002f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1003f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1004f44d3a6dSSatish Balay       v += 16;
1005f44d3a6dSSatish Balay     }
1006f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1007f44d3a6dSSatish Balay     z += 4; y += 4;
1008f44d3a6dSSatish Balay   }
1009c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1010c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1011c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1012f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1013f44d3a6dSSatish Balay   return 0;
1014f44d3a6dSSatish Balay }
1015f44d3a6dSSatish Balay 
1016f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1017f44d3a6dSSatish Balay {
1018f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1019f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1020f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1021c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1022f44d3a6dSSatish Balay 
1023c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1024c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1025c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1026f44d3a6dSSatish Balay 
1027f44d3a6dSSatish Balay   idx   = a->j;
1028f44d3a6dSSatish Balay   v     = a->a;
1029f44d3a6dSSatish Balay   ii    = a->i;
1030f44d3a6dSSatish Balay 
1031f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1032f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1033f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1034f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1035f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1036f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1037f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1038f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1039f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1040f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1041f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1042f44d3a6dSSatish Balay       v += 25;
1043f44d3a6dSSatish Balay     }
1044f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1045f44d3a6dSSatish Balay     z += 5; y += 5;
1046f44d3a6dSSatish Balay   }
1047c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1048c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1049c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1050f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1051f44d3a6dSSatish Balay   return 0;
1052f44d3a6dSSatish Balay }
1053f44d3a6dSSatish Balay 
1054f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1055f44d3a6dSSatish Balay {
1056f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1057f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1058f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1059f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1060f44d3a6dSSatish Balay 
1061f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1062f44d3a6dSSatish Balay 
1063c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1064c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1065f44d3a6dSSatish Balay 
1066f44d3a6dSSatish Balay   idx   = a->j;
1067f44d3a6dSSatish Balay   v     = a->a;
1068f44d3a6dSSatish Balay   ii    = a->i;
1069f44d3a6dSSatish Balay 
1070f44d3a6dSSatish Balay 
1071f44d3a6dSSatish Balay   if (!a->mult_work) {
1072f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1073f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1074f44d3a6dSSatish Balay   }
1075f44d3a6dSSatish Balay   work = a->mult_work;
1076f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1077f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1078f44d3a6dSSatish Balay     ncols = n*bs;
1079f44d3a6dSSatish Balay     workt = work;
1080f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1081f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1082f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1083f44d3a6dSSatish Balay       workt += bs;
1084f44d3a6dSSatish Balay     }
1085f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1086f44d3a6dSSatish Balay     v += n*bs2;
1087f44d3a6dSSatish Balay     z += bs;
1088f44d3a6dSSatish Balay   }
1089c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1090c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1091f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1092f44d3a6dSSatish Balay   return 0;
1093f44d3a6dSSatish Balay }
1094f44d3a6dSSatish Balay 
10951a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1096bb42667fSSatish Balay {
1097bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
10981a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1099bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1100bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
11019df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1102119a934aSSatish Balay 
1103119a934aSSatish Balay 
1104bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1105bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1106bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1107bb42667fSSatish Balay 
1108119a934aSSatish Balay   idx   = a->j;
1109119a934aSSatish Balay   v     = a->a;
1110119a934aSSatish Balay   ii    = a->i;
1111119a934aSSatish Balay 
1112119a934aSSatish Balay   switch (bs) {
1113119a934aSSatish Balay   case 1:
1114119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1115119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1116119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1117119a934aSSatish Balay       ib = idx + ai[i];
1118119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1119bb1453f0SSatish Balay         rval    = ib[j];
1120bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1121119a934aSSatish Balay       }
1122119a934aSSatish Balay     }
1123119a934aSSatish Balay     break;
1124119a934aSSatish Balay   case 2:
1125119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1126119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1127119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1128119a934aSSatish Balay       ib = idx + ai[i];
1129119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1130119a934aSSatish Balay         rval      = ib[j]*2;
1131bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1132bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1133119a934aSSatish Balay         v += 4;
1134119a934aSSatish Balay       }
1135119a934aSSatish Balay     }
1136119a934aSSatish Balay     break;
1137119a934aSSatish Balay   case 3:
1138119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1139119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1140119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1141119a934aSSatish Balay       ib = idx + ai[i];
1142119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1143119a934aSSatish Balay         rval      = ib[j]*3;
1144bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1145bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1146bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1147119a934aSSatish Balay         v += 9;
1148119a934aSSatish Balay       }
1149119a934aSSatish Balay     }
1150119a934aSSatish Balay     break;
1151119a934aSSatish Balay   case 4:
1152119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1153119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1154119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1155119a934aSSatish Balay       ib = idx + ai[i];
1156119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1157119a934aSSatish Balay         rval      = ib[j]*4;
1158bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1159bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1160bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1161bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1162119a934aSSatish Balay         v += 16;
1163119a934aSSatish Balay       }
1164119a934aSSatish Balay     }
1165119a934aSSatish Balay     break;
1166119a934aSSatish Balay   case 5:
1167119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1168119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1169119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1170119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1171119a934aSSatish Balay       ib = idx + ai[i];
1172119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1173119a934aSSatish Balay         rval      = ib[j]*5;
1174bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1175bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1176bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1177bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1178bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1179119a934aSSatish Balay         v += 25;
1180119a934aSSatish Balay       }
1181119a934aSSatish Balay     }
1182119a934aSSatish Balay     break;
1183119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1184119a934aSSatish Balay     default: {
1185119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1186119a934aSSatish Balay       if (!a->mult_work) {
11873b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1188bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1189119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1190119a934aSSatish Balay       }
1191119a934aSSatish Balay       work = a->mult_work;
1192119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1193119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1194119a934aSSatish Balay         ncols = n*bs;
1195119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1196119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1197119a934aSSatish Balay         v += n*bs2;
1198119a934aSSatish Balay         x += bs;
1199119a934aSSatish Balay         workt = work;
1200119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1201119a934aSSatish Balay           zb = z + bs*(*idx++);
1202bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1203119a934aSSatish Balay           workt += bs;
1204119a934aSSatish Balay         }
1205119a934aSSatish Balay       }
1206119a934aSSatish Balay     }
1207119a934aSSatish Balay   }
12080419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12090419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1210faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1211faf6580fSSatish Balay   return 0;
1212faf6580fSSatish Balay }
1213faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214faf6580fSSatish Balay {
1215faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1216faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1217faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1218faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1219faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1220faf6580fSSatish Balay 
1221faf6580fSSatish Balay 
1222faf6580fSSatish Balay 
1223faf6580fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1224faf6580fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1225faf6580fSSatish Balay 
1226648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1227648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1228648c1d95SSatish Balay 
1229faf6580fSSatish Balay 
1230faf6580fSSatish Balay   idx   = a->j;
1231faf6580fSSatish Balay   v     = a->a;
1232faf6580fSSatish Balay   ii    = a->i;
1233faf6580fSSatish Balay 
1234faf6580fSSatish Balay   switch (bs) {
1235faf6580fSSatish Balay   case 1:
1236faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1237faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1238faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1239faf6580fSSatish Balay       ib = idx + ai[i];
1240faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1241faf6580fSSatish Balay         rval    = ib[j];
1242faf6580fSSatish Balay         z[rval] += *v++ * x1;
1243faf6580fSSatish Balay       }
1244faf6580fSSatish Balay     }
1245faf6580fSSatish Balay     break;
1246faf6580fSSatish Balay   case 2:
1247faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1248faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1249faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1250faf6580fSSatish Balay       ib = idx + ai[i];
1251faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1252faf6580fSSatish Balay         rval      = ib[j]*2;
1253faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1254faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1255faf6580fSSatish Balay         v += 4;
1256faf6580fSSatish Balay       }
1257faf6580fSSatish Balay     }
1258faf6580fSSatish Balay     break;
1259faf6580fSSatish Balay   case 3:
1260faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1261faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1262faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1263faf6580fSSatish Balay       ib = idx + ai[i];
1264faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1265faf6580fSSatish Balay         rval      = ib[j]*3;
1266faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1267faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1268faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1269faf6580fSSatish Balay         v += 9;
1270faf6580fSSatish Balay       }
1271faf6580fSSatish Balay     }
1272faf6580fSSatish Balay     break;
1273faf6580fSSatish Balay   case 4:
1274faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1275faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1276faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1277faf6580fSSatish Balay       ib = idx + ai[i];
1278faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1279faf6580fSSatish Balay         rval      = ib[j]*4;
1280faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1281faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1282faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1283faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1284faf6580fSSatish Balay         v += 16;
1285faf6580fSSatish Balay       }
1286faf6580fSSatish Balay     }
1287faf6580fSSatish Balay     break;
1288faf6580fSSatish Balay   case 5:
1289faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1290faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1291faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1292faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1293faf6580fSSatish Balay       ib = idx + ai[i];
1294faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1295faf6580fSSatish Balay         rval      = ib[j]*5;
1296faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1297faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1298faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1299faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1300faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1301faf6580fSSatish Balay         v += 25;
1302faf6580fSSatish Balay       }
1303faf6580fSSatish Balay     }
1304faf6580fSSatish Balay     break;
1305faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1306faf6580fSSatish Balay     default: {
1307faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1308faf6580fSSatish Balay       if (!a->mult_work) {
1309faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1310faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1311faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1312faf6580fSSatish Balay       }
1313faf6580fSSatish Balay       work = a->mult_work;
1314faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1315faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1316faf6580fSSatish Balay         ncols = n*bs;
1317faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1318faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1319faf6580fSSatish Balay         v += n*bs2;
1320faf6580fSSatish Balay         x += bs;
1321faf6580fSSatish Balay         workt = work;
1322faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1323faf6580fSSatish Balay           zb = z + bs*(*idx++);
1324faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1325faf6580fSSatish Balay           workt += bs;
1326faf6580fSSatish Balay         }
1327faf6580fSSatish Balay       }
1328faf6580fSSatish Balay     }
1329faf6580fSSatish Balay   }
1330faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1331faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1332faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
13332593348eSBarry Smith   return 0;
13342593348eSBarry Smith }
13352593348eSBarry Smith 
1336de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
13372593348eSBarry Smith {
1338b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13399df24120SSatish Balay   if (nz)  *nz  = a->bs2*a->nz;
1340bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
1341bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
13422593348eSBarry Smith   return 0;
13432593348eSBarry Smith }
13442593348eSBarry Smith 
134591d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
134691d316f6SSatish Balay {
134791d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
134891d316f6SSatish Balay 
134991d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
135091d316f6SSatish Balay 
135191d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
135291d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1353a7c10996SSatish Balay       (a->nz != b->nz)) {
135491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
135591d316f6SSatish Balay   }
135691d316f6SSatish Balay 
135791d316f6SSatish Balay   /* if the a->i are the same */
135891d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
135991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
136091d316f6SSatish Balay   }
136191d316f6SSatish Balay 
136291d316f6SSatish Balay   /* if a->j are the same */
136391d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
136491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
136591d316f6SSatish Balay   }
136691d316f6SSatish Balay 
136791d316f6SSatish Balay   /* if a->a are the same */
136891d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
136991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137091d316f6SSatish Balay   }
137191d316f6SSatish Balay   *flg = PETSC_TRUE;
137291d316f6SSatish Balay   return 0;
137391d316f6SSatish Balay 
137491d316f6SSatish Balay }
137591d316f6SSatish Balay 
137691d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
137791d316f6SSatish Balay {
137891d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13797e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
138017e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
138117e48fc4SSatish Balay 
138217e48fc4SSatish Balay   bs   = a->bs;
138317e48fc4SSatish Balay   aa   = a->a;
138417e48fc4SSatish Balay   ai   = a->i;
138517e48fc4SSatish Balay   aj   = a->j;
138617e48fc4SSatish Balay   ambs = a->mbs;
13879df24120SSatish Balay   bs2  = a->bs2;
138891d316f6SSatish Balay 
138991d316f6SSatish Balay   VecSet(&zero,v);
139091d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
13919867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
139217e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
139317e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
139417e48fc4SSatish Balay       if (aj[j] == i) {
139517e48fc4SSatish Balay         row  = i*bs;
13967e67e3f9SSatish Balay         aa_j = aa+j*bs2;
13977e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
139891d316f6SSatish Balay         break;
139991d316f6SSatish Balay       }
140091d316f6SSatish Balay     }
140191d316f6SSatish Balay   }
140291d316f6SSatish Balay   return 0;
140391d316f6SSatish Balay }
140491d316f6SSatish Balay 
14059867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
14069867e207SSatish Balay {
14079867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14089867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
14097e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
14109867e207SSatish Balay 
14119867e207SSatish Balay   ai  = a->i;
14129867e207SSatish Balay   aj  = a->j;
14139867e207SSatish Balay   aa  = a->a;
14149867e207SSatish Balay   m   = a->m;
14159867e207SSatish Balay   n   = a->n;
14169867e207SSatish Balay   bs  = a->bs;
14179867e207SSatish Balay   mbs = a->mbs;
14189df24120SSatish Balay   bs2 = a->bs2;
14199867e207SSatish Balay   if (ll) {
14209867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
14219867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
14229867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14239867e207SSatish Balay       M  = ai[i+1] - ai[i];
14249867e207SSatish Balay       li = l + i*bs;
14257e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14269867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14277e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
14289867e207SSatish Balay           (*v++) *= li[k%bs];
14299867e207SSatish Balay         }
14309867e207SSatish Balay       }
14319867e207SSatish Balay     }
14329867e207SSatish Balay   }
14339867e207SSatish Balay 
14349867e207SSatish Balay   if (rr) {
14359867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
14369867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
14379867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14389867e207SSatish Balay       M  = ai[i+1] - ai[i];
14397e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14409867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14419867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
14429867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
14439867e207SSatish Balay           x = ri[k];
14449867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
14459867e207SSatish Balay         }
14469867e207SSatish Balay       }
14479867e207SSatish Balay     }
14489867e207SSatish Balay   }
14499867e207SSatish Balay   return 0;
14509867e207SSatish Balay }
14519867e207SSatish Balay 
14529867e207SSatish Balay 
1453b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1454b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
14552a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1456736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1457736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
14581a6a6d98SBarry Smith 
14591a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
14601a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
14611a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
14621a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
14631a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
14641a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
14651a6a6d98SBarry Smith 
14667fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
14677fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
14687fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
14697fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
14707fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
14717fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
14722593348eSBarry Smith 
1473b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
14742593348eSBarry Smith {
1475b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14762593348eSBarry Smith   Scalar      *v = a->a;
14772593348eSBarry Smith   double      sum = 0.0;
14789df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
14792593348eSBarry Smith 
14802593348eSBarry Smith   if (type == NORM_FROBENIUS) {
14810419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
14822593348eSBarry Smith #if defined(PETSC_COMPLEX)
14832593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
14842593348eSBarry Smith #else
14852593348eSBarry Smith       sum += (*v)*(*v); v++;
14862593348eSBarry Smith #endif
14872593348eSBarry Smith     }
14882593348eSBarry Smith     *norm = sqrt(sum);
14892593348eSBarry Smith   }
14902593348eSBarry Smith   else {
1491b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
14922593348eSBarry Smith   }
14932593348eSBarry Smith   return 0;
14942593348eSBarry Smith }
14952593348eSBarry Smith 
14962593348eSBarry Smith /*
14972593348eSBarry Smith      note: This can only work for identity for row and col. It would
14982593348eSBarry Smith    be good to check this and otherwise generate an error.
14992593348eSBarry Smith */
1500b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
15012593348eSBarry Smith {
1502b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15032593348eSBarry Smith   Mat         outA;
1504de6a44a3SBarry Smith   int         ierr;
15052593348eSBarry Smith 
1506b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
15072593348eSBarry Smith 
15082593348eSBarry Smith   outA          = inA;
15092593348eSBarry Smith   inA->factor   = FACTOR_LU;
15102593348eSBarry Smith   a->row        = row;
15112593348eSBarry Smith   a->col        = col;
15122593348eSBarry Smith 
1513de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
15142593348eSBarry Smith 
15152593348eSBarry Smith   if (!a->diag) {
1516b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
15172593348eSBarry Smith   }
15187fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
15192593348eSBarry Smith   return 0;
15202593348eSBarry Smith }
15212593348eSBarry Smith 
1522b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
15232593348eSBarry Smith {
1524b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15259df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1526b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1527b6490206SBarry Smith   PLogFlops(totalnz);
15282593348eSBarry Smith   return 0;
15292593348eSBarry Smith }
15302593348eSBarry Smith 
15317e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
15327e67e3f9SSatish Balay {
15337e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15347e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1535a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
15369df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
15377e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
15387e67e3f9SSatish Balay 
15397e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
15407e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
15417e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
15427e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1543a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
15447e67e3f9SSatish Balay     nrow = ailen[brow];
15457e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
15467e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
15477e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1548a7c10996SSatish Balay       col  = in[l] ;
15497e67e3f9SSatish Balay       bcol = col/bs;
15507e67e3f9SSatish Balay       cidx = col%bs;
15517e67e3f9SSatish Balay       ridx = row%bs;
15527e67e3f9SSatish Balay       high = nrow;
15537e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
15547e67e3f9SSatish Balay       while (high-low > 5) {
15557e67e3f9SSatish Balay         t = (low+high)/2;
15567e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
15577e67e3f9SSatish Balay         else             low  = t;
15587e67e3f9SSatish Balay       }
15597e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
15607e67e3f9SSatish Balay         if (rp[i] > bcol) break;
15617e67e3f9SSatish Balay         if (rp[i] == bcol) {
15627e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
15637e67e3f9SSatish Balay           goto finished;
15647e67e3f9SSatish Balay         }
15657e67e3f9SSatish Balay       }
15667e67e3f9SSatish Balay       *v++ = zero;
15677e67e3f9SSatish Balay       finished:;
15687e67e3f9SSatish Balay     }
15697e67e3f9SSatish Balay   }
15707e67e3f9SSatish Balay   return 0;
15717e67e3f9SSatish Balay }
15727e67e3f9SSatish Balay 
15735a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
15745a838052SSatish Balay {
15755a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
15765a838052SSatish Balay   *bs = baij->bs;
15775a838052SSatish Balay   return 0;
15785a838052SSatish Balay }
15795a838052SSatish Balay 
1580d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1581d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1582d9b7c43dSSatish Balay {
1583d9b7c43dSSatish Balay   int i,row;
1584d9b7c43dSSatish Balay   row = idx[0];
1585d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1586d9b7c43dSSatish Balay 
1587d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1588d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1589d9b7c43dSSatish Balay   }
1590d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1591d9b7c43dSSatish Balay   return 0;
1592d9b7c43dSSatish Balay }
1593d9b7c43dSSatish Balay 
1594d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1595d9b7c43dSSatish Balay {
1596d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1597d9b7c43dSSatish Balay   IS          is_local;
1598d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1599d9b7c43dSSatish Balay   PetscTruth  flg;
1600d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1601d9b7c43dSSatish Balay 
1602d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1603d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1604d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1605d9b7c43dSSatish Balay   ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1606d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1607d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1608d9b7c43dSSatish Balay 
1609d9b7c43dSSatish Balay   i = 0;
1610d9b7c43dSSatish Balay   while (i < is_n) {
1611d9b7c43dSSatish Balay     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1612d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1613d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1614d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1615d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1616d9b7c43dSSatish Balay       i += bs;
1617d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1618d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1619d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1620d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1621d9b7c43dSSatish Balay         aa[0] = zero;
1622d9b7c43dSSatish Balay         aa+=bs;
1623d9b7c43dSSatish Balay       }
1624d9b7c43dSSatish Balay       i++;
1625d9b7c43dSSatish Balay     }
1626d9b7c43dSSatish Balay   }
1627d9b7c43dSSatish Balay   if (diag) {
1628d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1629d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1630d9b7c43dSSatish Balay     }
1631d9b7c43dSSatish Balay   }
1632d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1633d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1634d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1635d9b7c43dSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1636d9b7c43dSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1637d9b7c43dSSatish Balay 
1638d9b7c43dSSatish Balay   return 0;
1639d9b7c43dSSatish Balay }
1640ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1641ba4ca20aSSatish Balay {
1642ba4ca20aSSatish Balay   static int called = 0;
1643ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1644ba4ca20aSSatish Balay 
1645ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1646ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1647ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1648ba4ca20aSSatish Balay   return 0;
1649ba4ca20aSSatish Balay }
1650d9b7c43dSSatish Balay 
16512593348eSBarry Smith /* -------------------------------------------------------------------*/
1652cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
16539867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1654f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1655faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
16561a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1657b6490206SBarry Smith        0,0,
1658de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1659b6490206SBarry Smith        0,
1660f2501298SSatish Balay        MatTranspose_SeqBAIJ,
166117e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
16629867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1663584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1664b6490206SBarry Smith        0,
1665d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1666b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
16677fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1668b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1669de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1670b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1671b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1672b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1673af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
16747e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1675ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
16765a838052SSatish Balay        0,0,0,MatGetBlockSize_SeqBAIJ};
16772593348eSBarry Smith 
16782593348eSBarry Smith /*@C
167944cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
168044cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
168144cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
16822bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
16832bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
16842593348eSBarry Smith 
16852593348eSBarry Smith    Input Parameters:
16862593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1687b6490206SBarry Smith .  bs - size of block
16882593348eSBarry Smith .  m - number of rows
16892593348eSBarry Smith .  n - number of columns
1690b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
16912bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
16922bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
16932593348eSBarry Smith 
16942593348eSBarry Smith    Output Parameter:
16952593348eSBarry Smith .  A - the matrix
16962593348eSBarry Smith 
16972593348eSBarry Smith    Notes:
169844cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
16992593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
170044cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
17012593348eSBarry Smith 
17022593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
17032593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
17042593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
17052593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
17062593348eSBarry Smith 
170744cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17082593348eSBarry Smith @*/
1709b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
17102593348eSBarry Smith {
17112593348eSBarry Smith   Mat         B;
1712b6490206SBarry Smith   Mat_SeqBAIJ *b;
1713f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1714b6490206SBarry Smith 
1715f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1716f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
17172593348eSBarry Smith 
17182593348eSBarry Smith   *A = 0;
1719b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
17202593348eSBarry Smith   PLogObjectCreate(B);
1721b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
172244cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
17232593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17241a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
17251a6a6d98SBarry Smith   if (!flg) {
17267fc0212eSBarry Smith     switch (bs) {
17277fc0212eSBarry Smith       case 1:
17287fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17291a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
173039b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1731f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
17327fc0212eSBarry Smith        break;
17334eeb42bcSBarry Smith       case 2:
17344eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
17351a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
173639b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1737f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
17386d84be18SBarry Smith         break;
1739f327dd97SBarry Smith       case 3:
1740f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
17411a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
174239b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1743f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
17444eeb42bcSBarry Smith         break;
17456d84be18SBarry Smith       case 4:
17466d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
17471a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
174839b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1749f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
17506d84be18SBarry Smith         break;
17516d84be18SBarry Smith       case 5:
17526d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
17531a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
175439b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1755f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
17566d84be18SBarry Smith         break;
17577fc0212eSBarry Smith     }
17581a6a6d98SBarry Smith   }
1759b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1760b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
17612593348eSBarry Smith   B->factor           = 0;
17622593348eSBarry Smith   B->lupivotthreshold = 1.0;
17632593348eSBarry Smith   b->row              = 0;
17642593348eSBarry Smith   b->col              = 0;
17652593348eSBarry Smith   b->reallocs         = 0;
17662593348eSBarry Smith 
176744cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
176844cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1769b6490206SBarry Smith   b->mbs     = mbs;
1770f2501298SSatish Balay   b->nbs     = nbs;
1771b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
17722593348eSBarry Smith   if (nnz == PETSC_NULL) {
1773119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
17742593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1775b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1776b6490206SBarry Smith     nz = nz*mbs;
17772593348eSBarry Smith   }
17782593348eSBarry Smith   else {
17792593348eSBarry Smith     nz = 0;
1780b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
17812593348eSBarry Smith   }
17822593348eSBarry Smith 
17832593348eSBarry Smith   /* allocate the matrix space */
17847e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
17852593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
17867e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
17877e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
17882593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
17892593348eSBarry Smith   b->i  = b->j + nz;
17902593348eSBarry Smith   b->singlemalloc = 1;
17912593348eSBarry Smith 
1792de6a44a3SBarry Smith   b->i[0] = 0;
1793b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
17942593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
17952593348eSBarry Smith   }
17962593348eSBarry Smith 
1797b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1798b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1799b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1800b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
18012593348eSBarry Smith 
1802b6490206SBarry Smith   b->bs               = bs;
18039df24120SSatish Balay   b->bs2              = bs2;
1804b6490206SBarry Smith   b->mbs              = mbs;
18052593348eSBarry Smith   b->nz               = 0;
180618e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
18072593348eSBarry Smith   b->sorted           = 0;
18082593348eSBarry Smith   b->roworiented      = 1;
18092593348eSBarry Smith   b->nonew            = 0;
18102593348eSBarry Smith   b->diag             = 0;
18112593348eSBarry Smith   b->solve_work       = 0;
1812de6a44a3SBarry Smith   b->mult_work        = 0;
18132593348eSBarry Smith   b->spptr            = 0;
18142593348eSBarry Smith   *A = B;
18152593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
18162593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
18172593348eSBarry Smith   return 0;
18182593348eSBarry Smith }
18192593348eSBarry Smith 
1820b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
18212593348eSBarry Smith {
18222593348eSBarry Smith   Mat         C;
1823b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
18249df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1825de6a44a3SBarry Smith 
1826de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
18272593348eSBarry Smith 
18282593348eSBarry Smith   *B = 0;
1829b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
18302593348eSBarry Smith   PLogObjectCreate(C);
1831b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
18322593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1833b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1834b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
18352593348eSBarry Smith   C->factor     = A->factor;
18362593348eSBarry Smith   c->row        = 0;
18372593348eSBarry Smith   c->col        = 0;
18382593348eSBarry Smith   C->assembled  = PETSC_TRUE;
18392593348eSBarry Smith 
184044cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
184144cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
184244cd7ae7SLois Curfman McInnes   C->M          = a->m;
184344cd7ae7SLois Curfman McInnes   C->N          = a->n;
184444cd7ae7SLois Curfman McInnes 
1845b6490206SBarry Smith   c->bs         = a->bs;
18469df24120SSatish Balay   c->bs2        = a->bs2;
1847b6490206SBarry Smith   c->mbs        = a->mbs;
1848b6490206SBarry Smith   c->nbs        = a->nbs;
18492593348eSBarry Smith 
1850b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1851b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1852b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
18532593348eSBarry Smith     c->imax[i] = a->imax[i];
18542593348eSBarry Smith     c->ilen[i] = a->ilen[i];
18552593348eSBarry Smith   }
18562593348eSBarry Smith 
18572593348eSBarry Smith   /* allocate the matrix space */
18582593348eSBarry Smith   c->singlemalloc = 1;
18597e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
18602593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
18617e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1862de6a44a3SBarry Smith   c->i  = c->j + nz;
1863b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1864b6490206SBarry Smith   if (mbs > 0) {
1865de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
18662593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
18677e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
18682593348eSBarry Smith     }
18692593348eSBarry Smith   }
18702593348eSBarry Smith 
1871b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
18722593348eSBarry Smith   c->sorted      = a->sorted;
18732593348eSBarry Smith   c->roworiented = a->roworiented;
18742593348eSBarry Smith   c->nonew       = a->nonew;
18752593348eSBarry Smith 
18762593348eSBarry Smith   if (a->diag) {
1877b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1878b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1879b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
18802593348eSBarry Smith       c->diag[i] = a->diag[i];
18812593348eSBarry Smith     }
18822593348eSBarry Smith   }
18832593348eSBarry Smith   else c->diag          = 0;
18842593348eSBarry Smith   c->nz                 = a->nz;
18852593348eSBarry Smith   c->maxnz              = a->maxnz;
18862593348eSBarry Smith   c->solve_work         = 0;
18872593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
18887fc0212eSBarry Smith   c->mult_work          = 0;
18892593348eSBarry Smith   *B = C;
18902593348eSBarry Smith   return 0;
18912593348eSBarry Smith }
18922593348eSBarry Smith 
189319bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
18942593348eSBarry Smith {
1895b6490206SBarry Smith   Mat_SeqBAIJ  *a;
18962593348eSBarry Smith   Mat          B;
1897de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1898b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
189935aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1900a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1901b6490206SBarry Smith   Scalar       *aa;
190219bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
19032593348eSBarry Smith 
19041a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1905de6a44a3SBarry Smith   bs2  = bs*bs;
1906b6490206SBarry Smith 
19072593348eSBarry Smith   MPI_Comm_size(comm,&size);
1908b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
190990ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
191077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1911de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
19122593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
19132593348eSBarry Smith 
1914b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
191535aab85fSBarry Smith 
191635aab85fSBarry Smith   /*
191735aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
191835aab85fSBarry Smith     divisible by the blocksize
191935aab85fSBarry Smith   */
1920b6490206SBarry Smith   mbs        = M/bs;
192135aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
192235aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
192335aab85fSBarry Smith   else                  mbs++;
192435aab85fSBarry Smith   if (extra_rows) {
19251a6a6d98SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
192635aab85fSBarry Smith   }
1927b6490206SBarry Smith 
19282593348eSBarry Smith   /* read in row lengths */
192935aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
193077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
193135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
19322593348eSBarry Smith 
1933b6490206SBarry Smith   /* read in column indices */
193435aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
193577c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
193635aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1937b6490206SBarry Smith 
1938b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1939b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1940b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
194135aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
194235aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
194335aab85fSBarry Smith   masked = mask + mbs;
1944b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1945b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
194635aab85fSBarry Smith     nmask = 0;
1947b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1948b6490206SBarry Smith       kmax = rowlengths[rowcount];
1949b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
195035aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
195135aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1952b6490206SBarry Smith       }
1953b6490206SBarry Smith       rowcount++;
1954b6490206SBarry Smith     }
195535aab85fSBarry Smith     browlengths[i] += nmask;
195635aab85fSBarry Smith     /* zero out the mask elements we set */
195735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1958b6490206SBarry Smith   }
1959b6490206SBarry Smith 
19602593348eSBarry Smith   /* create our matrix */
196135aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
196235aab85fSBarry Smith          CHKERRQ(ierr);
19632593348eSBarry Smith   B = *A;
1964b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
19652593348eSBarry Smith 
19662593348eSBarry Smith   /* set matrix "i" values */
1967de6a44a3SBarry Smith   a->i[0] = 0;
1968b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1969b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1970b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
19712593348eSBarry Smith   }
1972b6490206SBarry Smith   a->nz         = 0;
1973b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
19742593348eSBarry Smith 
1975b6490206SBarry Smith   /* read in nonzero values */
197635aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
197777c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
197835aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1979b6490206SBarry Smith 
1980b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1981b6490206SBarry Smith   nzcount = 0; jcount = 0;
1982b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1983b6490206SBarry Smith     nzcountb = nzcount;
198435aab85fSBarry Smith     nmask    = 0;
1985b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1986b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1987b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
198835aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
198935aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1990b6490206SBarry Smith       }
1991b6490206SBarry Smith       rowcount++;
1992b6490206SBarry Smith     }
1993de6a44a3SBarry Smith     /* sort the masked values */
199477c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1995de6a44a3SBarry Smith 
1996b6490206SBarry Smith     /* set "j" values into matrix */
1997b6490206SBarry Smith     maskcount = 1;
199835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
199935aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2000de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2001b6490206SBarry Smith     }
2002b6490206SBarry Smith     /* set "a" values into matrix */
2003de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2004b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2005b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2006b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2007de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2008de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2009de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2010de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2011b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2012b6490206SBarry Smith       }
2013b6490206SBarry Smith     }
201435aab85fSBarry Smith     /* zero out the mask elements we set */
201535aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2016b6490206SBarry Smith   }
201735aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2018b6490206SBarry Smith 
2019b6490206SBarry Smith   PetscFree(rowlengths);
2020b6490206SBarry Smith   PetscFree(browlengths);
2021b6490206SBarry Smith   PetscFree(aa);
2022b6490206SBarry Smith   PetscFree(jj);
2023b6490206SBarry Smith   PetscFree(mask);
2024b6490206SBarry Smith 
2025b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2026de6a44a3SBarry Smith 
2027de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2028de6a44a3SBarry Smith   if (flg) {
202919bcc07fSBarry Smith     Viewer tviewer;
203019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
203190ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
203219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
203319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2034de6a44a3SBarry Smith   }
2035de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2036de6a44a3SBarry Smith   if (flg) {
203719bcc07fSBarry Smith     Viewer tviewer;
203819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
203990ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
204019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
204119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2042de6a44a3SBarry Smith   }
2043de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2044de6a44a3SBarry Smith   if (flg) {
204519bcc07fSBarry Smith     Viewer tviewer;
204619bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
204719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
204819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2049de6a44a3SBarry Smith   }
2050de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2051de6a44a3SBarry Smith   if (flg) {
205219bcc07fSBarry Smith     Viewer tviewer;
205319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
205490ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
205519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
205619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2057de6a44a3SBarry Smith   }
2058de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2059de6a44a3SBarry Smith   if (flg) {
206019bcc07fSBarry Smith     Viewer tviewer;
206119bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
206219bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
206319bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
206419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2065de6a44a3SBarry Smith   }
20662593348eSBarry Smith   return 0;
20672593348eSBarry Smith }
20682593348eSBarry Smith 
20692593348eSBarry Smith 
20702593348eSBarry Smith 
2071