xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c16cb8f2de640b3af1673495cb6d811eff24116a)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*c16cb8f2SBarry Smith static char vcid[] = "$Id: baij.c,v 1.62 1996/08/01 22:21:46 balay Exp bsmith $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
111a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
121a6a6d98SBarry Smith #include "src/inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
143b547af2SSatish Balay 
15bcd2baecSBarry Smith extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17a2ce50c7SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41a7c10996SSatish Balay   ishift = 0;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
441a6a6d98SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
451a6a6d98SBarry Smith     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
462593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
47bcd2baecSBarry Smith   } else {
48bcd2baecSBarry Smith     if (ishift == oshift) {
491a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50bcd2baecSBarry Smith     }
51bcd2baecSBarry Smith     else if (ishift == -1) {
52bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
531a6a6d98SBarry Smith       int nz = a->i[n] - 1;
54bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
551a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
561a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
581a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
59bcd2baecSBarry Smith     } else {
60bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
611a6a6d98SBarry Smith       int nz = a->i[n] - 1;
62bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
631a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
641a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
661a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
67bcd2baecSBarry Smith     }
68bcd2baecSBarry Smith   }
692593348eSBarry Smith   return 0;
702593348eSBarry Smith }
712593348eSBarry Smith 
72de6a44a3SBarry Smith /*
73de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
74de6a44a3SBarry Smith */
75de6a44a3SBarry Smith 
76de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
77de6a44a3SBarry Smith {
78de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
797fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
80de6a44a3SBarry Smith 
81de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
837fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
84de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85de6a44a3SBarry Smith       if (a->j[j] == i) {
86de6a44a3SBarry Smith         diag[i] = j;
87de6a44a3SBarry Smith         break;
88de6a44a3SBarry Smith       }
89de6a44a3SBarry Smith     }
90de6a44a3SBarry Smith   }
91de6a44a3SBarry Smith   a->diag = diag;
92de6a44a3SBarry Smith   return 0;
93de6a44a3SBarry Smith }
942593348eSBarry Smith 
952593348eSBarry Smith #include "draw.h"
962593348eSBarry Smith #include "pinclude/pviewer.h"
9777c4ece6SBarry Smith #include "sys.h"
982593348eSBarry Smith 
99b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1002593348eSBarry Smith {
101b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1029df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103b6490206SBarry Smith   Scalar      *aa;
1042593348eSBarry Smith 
10590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1062593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1072593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1082593348eSBarry Smith   col_lens[1] = a->m;
1092593348eSBarry Smith   col_lens[2] = a->n;
1107e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1112593348eSBarry Smith 
1122593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
113b6490206SBarry Smith   count = 0;
114b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
115b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
116b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117b6490206SBarry Smith     }
1182593348eSBarry Smith   }
11977c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1202593348eSBarry Smith   PetscFree(col_lens);
1212593348eSBarry Smith 
1222593348eSBarry Smith   /* store column indices (zero start index) */
1237e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124b6490206SBarry Smith   count = 0;
125b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
126b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
127b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
129b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1302593348eSBarry Smith         }
1312593348eSBarry Smith       }
132b6490206SBarry Smith     }
133b6490206SBarry Smith   }
1347e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135b6490206SBarry Smith   PetscFree(jj);
1362593348eSBarry Smith 
1372593348eSBarry Smith   /* store nonzero values */
1387e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139b6490206SBarry Smith   count = 0;
140b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
141b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
142b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1447e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
145b6490206SBarry Smith         }
146b6490206SBarry Smith       }
147b6490206SBarry Smith     }
148b6490206SBarry Smith   }
1497e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150b6490206SBarry Smith   PetscFree(aa);
1512593348eSBarry Smith   return 0;
1522593348eSBarry Smith }
1532593348eSBarry Smith 
154b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1552593348eSBarry Smith {
156b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1579df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1582593348eSBarry Smith   FILE        *fd;
1592593348eSBarry Smith   char        *outputname;
1602593348eSBarry Smith 
16190ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1622593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16390ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
1647ddc982cSLois Curfman McInnes   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
1657ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1662593348eSBarry Smith   }
16790ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
168b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1692593348eSBarry Smith   }
17044cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17144cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17244cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17344cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17444cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17544cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17644cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17744cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17844cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17944cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18144cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18244cd7ae7SLois Curfman McInnes #else
18344cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18444cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18544cd7ae7SLois Curfman McInnes #endif
18644cd7ae7SLois Curfman McInnes           }
18744cd7ae7SLois Curfman McInnes         }
18844cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18944cd7ae7SLois Curfman McInnes       }
19044cd7ae7SLois Curfman McInnes     }
19144cd7ae7SLois Curfman McInnes   }
1922593348eSBarry Smith   else {
193b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
194b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
195b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
196b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19888685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1997e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
20088685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2017e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20288685aaeSLois Curfman McInnes           }
20388685aaeSLois Curfman McInnes           else {
2047e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20588685aaeSLois Curfman McInnes           }
20688685aaeSLois Curfman McInnes #else
2077e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20888685aaeSLois Curfman McInnes #endif
2092593348eSBarry Smith           }
2102593348eSBarry Smith         }
2112593348eSBarry Smith         fprintf(fd,"\n");
2122593348eSBarry Smith       }
2132593348eSBarry Smith     }
214b6490206SBarry Smith   }
2152593348eSBarry Smith   fflush(fd);
2162593348eSBarry Smith   return 0;
2172593348eSBarry Smith }
2182593348eSBarry Smith 
2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
2203270192aSSatish Balay {
2213270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
2223270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
2233270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
2243270192aSSatish Balay   Scalar       *aa;
2253270192aSSatish Balay   Draw         draw;
2263270192aSSatish Balay   DrawButton   button;
2273270192aSSatish Balay   PetscTruth   isnull;
2283270192aSSatish Balay 
2293270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
2303270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
2313270192aSSatish Balay 
2323270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
2333270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
2343270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2353270192aSSatish Balay   /* loop over matrix elements drawing boxes */
2363270192aSSatish Balay   color = DRAW_BLUE;
2373270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2383270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2393270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2403270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2413270192aSSatish Balay       aa = a->a + j*bs2;
2423270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2433270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2443270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
2453270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2463270192aSSatish Balay         }
2473270192aSSatish Balay       }
2483270192aSSatish Balay     }
2493270192aSSatish Balay   }
2503270192aSSatish Balay   color = DRAW_CYAN;
2513270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2523270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2533270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2543270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2553270192aSSatish Balay       aa = a->a + j*bs2;
2563270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2573270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2583270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
2593270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2603270192aSSatish Balay         }
2613270192aSSatish Balay       }
2623270192aSSatish Balay     }
2633270192aSSatish Balay   }
2643270192aSSatish Balay 
2653270192aSSatish Balay   color = DRAW_RED;
2663270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2673270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2683270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
2693270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
2703270192aSSatish Balay       aa = a->a + j*bs2;
2713270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
2723270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
2733270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
2743270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
2753270192aSSatish Balay         }
2763270192aSSatish Balay       }
2773270192aSSatish Balay     }
2783270192aSSatish Balay   }
2793270192aSSatish Balay 
2803270192aSSatish Balay   DrawFlush(draw);
2813270192aSSatish Balay   DrawGetPause(draw,&pause);
2823270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
2833270192aSSatish Balay 
2843270192aSSatish Balay   /* allow the matrix to zoom or shrink */
2853270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
2863270192aSSatish Balay   while (button != BUTTON_RIGHT) {
2873270192aSSatish Balay     DrawClear(draw);
2883270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
2893270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
2903270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
2913270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
2923270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
2933270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
2943270192aSSatish Balay     w *= scale; h *= scale;
2953270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
2963270192aSSatish Balay     color = DRAW_BLUE;
2973270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
2983270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
2993270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3013270192aSSatish Balay         aa = a->a + j*bs2;
3023270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3033270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3043270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
3053270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3063270192aSSatish Balay           }
3073270192aSSatish Balay         }
3083270192aSSatish Balay       }
3093270192aSSatish Balay     }
3103270192aSSatish Balay     color = DRAW_CYAN;
3113270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3123270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3133270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3143270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3153270192aSSatish Balay         aa = a->a + j*bs2;
3163270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3173270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3183270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
3193270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3203270192aSSatish Balay           }
3213270192aSSatish Balay         }
3223270192aSSatish Balay       }
3233270192aSSatish Balay     }
3243270192aSSatish Balay 
3253270192aSSatish Balay     color = DRAW_RED;
3263270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
3273270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
3283270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
3293270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
3303270192aSSatish Balay         aa = a->a + j*bs2;
3313270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
3323270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
3333270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
3343270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
3353270192aSSatish Balay           }
3363270192aSSatish Balay         }
3373270192aSSatish Balay       }
3383270192aSSatish Balay     }
3393270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
3403270192aSSatish Balay   }
3413270192aSSatish Balay   return 0;
3423270192aSSatish Balay }
3433270192aSSatish Balay 
344b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3452593348eSBarry Smith {
3462593348eSBarry Smith   Mat         A = (Mat) obj;
34719bcc07fSBarry Smith   ViewerType  vtype;
34819bcc07fSBarry Smith   int         ierr;
3492593348eSBarry Smith 
35019bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35119bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
352b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
3532593348eSBarry Smith   }
35419bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
355b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3562593348eSBarry Smith   }
35719bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
358b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3592593348eSBarry Smith   }
36019bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
3613270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3622593348eSBarry Smith   }
3632593348eSBarry Smith   return 0;
3642593348eSBarry Smith }
365b6490206SBarry Smith 
366119a934aSSatish Balay #define CHUNKSIZE  10
367cd0e1443SSatish Balay 
368cd0e1443SSatish Balay /* This version has row oriented v  */
369cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
370cd0e1443SSatish Balay {
371cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
372cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
373cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
374a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3759df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
376cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
377cd0e1443SSatish Balay 
378cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
379cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
380cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
381cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
382a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
383cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
384cd0e1443SSatish Balay     low = 0;
385cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
386cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
387cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
388a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
389cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
390cd0e1443SSatish Balay       if (roworiented) {
391cd0e1443SSatish Balay         value = *v++;
392cd0e1443SSatish Balay       }
393cd0e1443SSatish Balay       else {
394cd0e1443SSatish Balay         value = v[k + l*m];
395cd0e1443SSatish Balay       }
396cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
397cd0e1443SSatish Balay       while (high-low > 5) {
398cd0e1443SSatish Balay         t = (low+high)/2;
399cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
400cd0e1443SSatish Balay         else             low  = t;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
403cd0e1443SSatish Balay         if (rp[i] > bcol) break;
404cd0e1443SSatish Balay         if (rp[i] == bcol) {
4057e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
406cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
407cd0e1443SSatish Balay           else                  *bap  = value;
408cd0e1443SSatish Balay           goto noinsert;
409cd0e1443SSatish Balay         }
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       if (nonew) goto noinsert;
412cd0e1443SSatish Balay       if (nrow >= rmax) {
413cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
414cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
415cd0e1443SSatish Balay         Scalar *new_a;
416cd0e1443SSatish Balay 
417cd0e1443SSatish Balay         /* malloc new storage space */
4187e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
419cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4207e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
421cd0e1443SSatish Balay         new_i   = new_j + new_nz;
422cd0e1443SSatish Balay 
423cd0e1443SSatish Balay         /* copy over old data into new slots */
424cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4257e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
426a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
427a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
428a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
429cd0e1443SSatish Balay                                                            len*sizeof(int));
430a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
431a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
432a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
433a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
434cd0e1443SSatish Balay         /* free up old matrix storage */
435cd0e1443SSatish Balay         PetscFree(a->a);
436cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
437cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
438cd0e1443SSatish Balay         a->singlemalloc = 1;
439cd0e1443SSatish Balay 
440a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
441cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4427e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
44318e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
444cd0e1443SSatish Balay         a->reallocs++;
445119a934aSSatish Balay         a->nz++;
446cd0e1443SSatish Balay       }
4477e67e3f9SSatish Balay       N = nrow++ - 1;
448cd0e1443SSatish Balay       /* shift up all the later entries in this row */
449cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
450cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4517e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
452cd0e1443SSatish Balay       }
4537e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
454cd0e1443SSatish Balay       rp[i] = bcol;
4557e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
456cd0e1443SSatish Balay       noinsert:;
457cd0e1443SSatish Balay       low = i;
458cd0e1443SSatish Balay     }
459cd0e1443SSatish Balay     ailen[brow] = nrow;
460cd0e1443SSatish Balay   }
461cd0e1443SSatish Balay   return 0;
462cd0e1443SSatish Balay }
463cd0e1443SSatish Balay 
4640b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4650b824a48SSatish Balay {
4660b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4670b824a48SSatish Balay   *m = a->m; *n = a->n;
4680b824a48SSatish Balay   return 0;
4690b824a48SSatish Balay }
4700b824a48SSatish Balay 
4710b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4720b824a48SSatish Balay {
4730b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4740b824a48SSatish Balay   *m = 0; *n = a->m;
4750b824a48SSatish Balay   return 0;
4760b824a48SSatish Balay }
4770b824a48SSatish Balay 
4789867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4799867e207SSatish Balay {
4809867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4817e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4829867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4839867e207SSatish Balay 
4849867e207SSatish Balay   bs  = a->bs;
4859867e207SSatish Balay   ai  = a->i;
4869867e207SSatish Balay   aj  = a->j;
4879867e207SSatish Balay   aa  = a->a;
4889df24120SSatish Balay   bs2 = a->bs2;
4899867e207SSatish Balay 
4909867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
4919867e207SSatish Balay 
4929867e207SSatish Balay   bn  = row/bs;   /* Block number */
4939867e207SSatish Balay   bp  = row % bs; /* Block Position */
4949867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
4959867e207SSatish Balay   *nz = bs*M;
4969867e207SSatish Balay 
4979867e207SSatish Balay   if (v) {
4989867e207SSatish Balay     *v = 0;
4999867e207SSatish Balay     if (*nz) {
5009867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
5019867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5029867e207SSatish Balay         v_i  = *v + i*bs;
5037e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
5047e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
5059867e207SSatish Balay       }
5069867e207SSatish Balay     }
5079867e207SSatish Balay   }
5089867e207SSatish Balay 
5099867e207SSatish Balay   if (idx) {
5109867e207SSatish Balay     *idx = 0;
5119867e207SSatish Balay     if (*nz) {
5129867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5139867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5149867e207SSatish Balay         idx_i = *idx + i*bs;
5159867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5169867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5179867e207SSatish Balay       }
5189867e207SSatish Balay     }
5199867e207SSatish Balay   }
5209867e207SSatish Balay   return 0;
5219867e207SSatish Balay }
5229867e207SSatish Balay 
5239867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5249867e207SSatish Balay {
5259867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5269867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5279867e207SSatish Balay   return 0;
5289867e207SSatish Balay }
529b6490206SBarry Smith 
530f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
531f2501298SSatish Balay {
532f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
533f2501298SSatish Balay   Mat         C;
534f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5359df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
536f2501298SSatish Balay   Scalar      *array=a->a;
537f2501298SSatish Balay 
538f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
539f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
540f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
541f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
542a7c10996SSatish Balay 
543a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
544f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
545f2501298SSatish Balay   PetscFree(col);
546f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
547f2501298SSatish Balay   cols = rows + bs;
548f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
549f2501298SSatish Balay     cols[0] = i*bs;
550f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
551f2501298SSatish Balay     len = ai[i+1] - ai[i];
552f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
553f2501298SSatish Balay       rows[0] = (*aj++)*bs;
554f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
555f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
556f2501298SSatish Balay       array += bs2;
557f2501298SSatish Balay     }
558f2501298SSatish Balay   }
5591073c447SSatish Balay   PetscFree(rows);
560f2501298SSatish Balay 
5616d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5626d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
563f2501298SSatish Balay 
564f2501298SSatish Balay   if (B != PETSC_NULL) {
565f2501298SSatish Balay     *B = C;
566f2501298SSatish Balay   } else {
567f2501298SSatish Balay     /* This isn't really an in-place transpose */
568f2501298SSatish Balay     PetscFree(a->a);
569f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
570f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
571f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
572f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
573f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
574f2501298SSatish Balay     PetscFree(a);
575f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
576f2501298SSatish Balay     PetscHeaderDestroy(C);
577f2501298SSatish Balay   }
578f2501298SSatish Balay   return 0;
579f2501298SSatish Balay }
580f2501298SSatish Balay 
581f2501298SSatish Balay 
582584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
583584200bdSSatish Balay {
584584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
585584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
586a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
5879df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
588584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
589584200bdSSatish Balay 
5906d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
591584200bdSSatish Balay 
592584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
593584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
594584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
595584200bdSSatish Balay     if (fshift) {
596a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
597584200bdSSatish Balay       N = ailen[i];
598584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
599584200bdSSatish Balay         ip[j-fshift] = ip[j];
6007e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
601584200bdSSatish Balay       }
602584200bdSSatish Balay     }
603584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
604584200bdSSatish Balay   }
605584200bdSSatish Balay   if (mbs) {
606584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
607584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
608584200bdSSatish Balay   }
609584200bdSSatish Balay   /* reset ilen and imax for each row */
610584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
611584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
612584200bdSSatish Balay   }
613a7c10996SSatish Balay   a->nz = ai[mbs];
614584200bdSSatish Balay 
615584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
616584200bdSSatish Balay   if (fshift && a->diag) {
617584200bdSSatish Balay     PetscFree(a->diag);
618584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
619584200bdSSatish Balay     a->diag = 0;
620584200bdSSatish Balay   }
62167790700SSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
622584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
623584200bdSSatish Balay            a->reallocs);
624584200bdSSatish Balay   return 0;
625584200bdSSatish Balay }
626584200bdSSatish Balay 
627b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6282593348eSBarry Smith {
629b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6309df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6312593348eSBarry Smith   return 0;
6322593348eSBarry Smith }
6332593348eSBarry Smith 
634b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6352593348eSBarry Smith {
6362593348eSBarry Smith   Mat         A  = (Mat) obj;
637b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6382593348eSBarry Smith 
6392593348eSBarry Smith #if defined(PETSC_LOG)
6402593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6412593348eSBarry Smith #endif
6422593348eSBarry Smith   PetscFree(a->a);
6432593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6442593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6452593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6462593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6472593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
648de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6492593348eSBarry Smith   PetscFree(a);
650f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
651f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6522593348eSBarry Smith   return 0;
6532593348eSBarry Smith }
6542593348eSBarry Smith 
655b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6562593348eSBarry Smith {
657b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6586d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6596d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6606d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
6616d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6626d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6636d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
6646d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
6656d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
6666d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
66794a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
6686d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
6696d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
6702593348eSBarry Smith   else
671b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
6722593348eSBarry Smith   return 0;
6732593348eSBarry Smith }
6742593348eSBarry Smith 
6752593348eSBarry Smith 
6762593348eSBarry Smith /* -------------------------------------------------------*/
6772593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
6782593348eSBarry Smith /* -------------------------------------------------------*/
679b6490206SBarry Smith #include "pinclude/plapack.h"
680b6490206SBarry Smith 
68139b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
6822593348eSBarry Smith {
683b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
68439b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
685*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
6862593348eSBarry Smith 
687*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
688*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
689b6490206SBarry Smith 
690119a934aSSatish Balay   idx   = a->j;
691119a934aSSatish Balay   v     = a->a;
692119a934aSSatish Balay   ii    = a->i;
693119a934aSSatish Balay 
694119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
695119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
696119a934aSSatish Balay     sum  = 0.0;
697119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
6981a6a6d98SBarry Smith     z[i] = sum;
699119a934aSSatish Balay   }
700*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
701*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
70239b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
70339b95ed1SBarry Smith   return 0;
70439b95ed1SBarry Smith }
70539b95ed1SBarry Smith 
70639b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
70739b95ed1SBarry Smith {
70839b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
70939b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
71039b95ed1SBarry Smith   register Scalar x1,x2;
71139b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
712*c16cb8f2SBarry Smith   int             j,n;
71339b95ed1SBarry Smith 
714*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
715*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
71639b95ed1SBarry Smith 
71739b95ed1SBarry Smith   idx   = a->j;
71839b95ed1SBarry Smith   v     = a->a;
71939b95ed1SBarry Smith   ii    = a->i;
72039b95ed1SBarry Smith 
721119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
722119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
723119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
724119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
725119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
726119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
727119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
728119a934aSSatish Balay       v += 4;
729119a934aSSatish Balay     }
7301a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
731119a934aSSatish Balay     z += 2;
732119a934aSSatish Balay   }
733*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
734*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
73539b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
73639b95ed1SBarry Smith   return 0;
73739b95ed1SBarry Smith }
73839b95ed1SBarry Smith 
73939b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
74039b95ed1SBarry Smith {
74139b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
74239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
743*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
74439b95ed1SBarry Smith 
745*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
746*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
74739b95ed1SBarry Smith 
74839b95ed1SBarry Smith   idx   = a->j;
74939b95ed1SBarry Smith   v     = a->a;
75039b95ed1SBarry Smith   ii    = a->i;
75139b95ed1SBarry Smith 
752119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
753119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
754119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
755119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
756119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
758119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
759119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
760119a934aSSatish Balay       v += 9;
761119a934aSSatish Balay     }
7621a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
763119a934aSSatish Balay     z += 3;
764119a934aSSatish Balay   }
765*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
766*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
76739b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
76839b95ed1SBarry Smith   return 0;
76939b95ed1SBarry Smith }
77039b95ed1SBarry Smith 
77139b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
77239b95ed1SBarry Smith {
77339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
77439b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
77539b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
77639b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
777*c16cb8f2SBarry Smith   int             j,n;
77839b95ed1SBarry Smith 
779*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
780*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
78139b95ed1SBarry Smith 
78239b95ed1SBarry Smith   idx   = a->j;
78339b95ed1SBarry Smith   v     = a->a;
78439b95ed1SBarry Smith   ii    = a->i;
78539b95ed1SBarry Smith 
786119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
787119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
788119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
789119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
790119a934aSSatish Balay       xb = x + 4*(*idx++);
791119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
792119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
793119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
794119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
795119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
796119a934aSSatish Balay       v += 16;
797119a934aSSatish Balay     }
7981a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
799119a934aSSatish Balay     z += 4;
800119a934aSSatish Balay   }
801*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
802*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
80339b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
80439b95ed1SBarry Smith   return 0;
80539b95ed1SBarry Smith }
80639b95ed1SBarry Smith 
80739b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
80839b95ed1SBarry Smith {
80939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
81039b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
81139b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
812*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
81339b95ed1SBarry Smith 
814*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
815*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
81639b95ed1SBarry Smith 
81739b95ed1SBarry Smith   idx   = a->j;
81839b95ed1SBarry Smith   v     = a->a;
81939b95ed1SBarry Smith   ii    = a->i;
82039b95ed1SBarry Smith 
821119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
822119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
823119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
824119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
825119a934aSSatish Balay       xb = x + 5*(*idx++);
826119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
827119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
828119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
829119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
830119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
831119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
832119a934aSSatish Balay       v += 25;
833119a934aSSatish Balay     }
8341a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
835119a934aSSatish Balay     z += 5;
836119a934aSSatish Balay   }
837*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
838*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
83939b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
84039b95ed1SBarry Smith   return 0;
84139b95ed1SBarry Smith }
84239b95ed1SBarry Smith 
84339b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
84439b95ed1SBarry Smith {
84539b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
84639b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
847*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
848*c16cb8f2SBarry Smith   int             _One = 1,ncols,k;
849*c16cb8f2SBarry Smith   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
85039b95ed1SBarry Smith 
851*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
852*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
85339b95ed1SBarry Smith 
85439b95ed1SBarry Smith   idx   = a->j;
85539b95ed1SBarry Smith   v     = a->a;
85639b95ed1SBarry Smith   ii    = a->i;
85739b95ed1SBarry Smith 
85839b95ed1SBarry Smith 
859119a934aSSatish Balay   if (!a->mult_work) {
8603b547af2SSatish Balay     k = PetscMax(a->m,a->n);
8612b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
862119a934aSSatish Balay   }
863119a934aSSatish Balay   work = a->mult_work;
864119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
865119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
866119a934aSSatish Balay     ncols = n*bs;
867119a934aSSatish Balay     workt = work;
868119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
869119a934aSSatish Balay       xb = x + bs*(*idx++);
870119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
871119a934aSSatish Balay       workt += bs;
872119a934aSSatish Balay     }
8731a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
874119a934aSSatish Balay     v += n*bs2;
875119a934aSSatish Balay     z += bs;
876119a934aSSatish Balay   }
877*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
878*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
8791a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
880bb42667fSSatish Balay   return 0;
881bb42667fSSatish Balay }
882bb42667fSSatish Balay 
883f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
884f44d3a6dSSatish Balay {
885f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
886f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
887*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n;
888f44d3a6dSSatish Balay 
889*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
890*c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
891*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
892f44d3a6dSSatish Balay 
893f44d3a6dSSatish Balay   idx   = a->j;
894f44d3a6dSSatish Balay   v     = a->a;
895f44d3a6dSSatish Balay   ii    = a->i;
896f44d3a6dSSatish Balay 
897f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
898f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
899f44d3a6dSSatish Balay     sum  = y[i];
900f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
901f44d3a6dSSatish Balay     z[i] = sum;
902f44d3a6dSSatish Balay   }
903*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
904*c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
905*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
906f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
907f44d3a6dSSatish Balay   return 0;
908f44d3a6dSSatish Balay }
909f44d3a6dSSatish Balay 
910f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
911f44d3a6dSSatish Balay {
912f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
913f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
914f44d3a6dSSatish Balay   register Scalar x1,x2;
915f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
916*c16cb8f2SBarry Smith   int             j,n;
917f44d3a6dSSatish Balay 
918*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
919*c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
920*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
921f44d3a6dSSatish Balay 
922f44d3a6dSSatish Balay   idx   = a->j;
923f44d3a6dSSatish Balay   v     = a->a;
924f44d3a6dSSatish Balay   ii    = a->i;
925f44d3a6dSSatish Balay 
926f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
927f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
928f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
929f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
930f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
931f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
932f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
933f44d3a6dSSatish Balay       v += 4;
934f44d3a6dSSatish Balay     }
935f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
936f44d3a6dSSatish Balay     z += 2; y += 2;
937f44d3a6dSSatish Balay   }
938*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
939*c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
940*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
941f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
942f44d3a6dSSatish Balay   return 0;
943f44d3a6dSSatish Balay }
944f44d3a6dSSatish Balay 
945f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
946f44d3a6dSSatish Balay {
947f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
948f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
949*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
950f44d3a6dSSatish Balay 
951*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
952*c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
953*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
954f44d3a6dSSatish Balay 
955f44d3a6dSSatish Balay   idx   = a->j;
956f44d3a6dSSatish Balay   v     = a->a;
957f44d3a6dSSatish Balay   ii    = a->i;
958f44d3a6dSSatish Balay 
959f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
960f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
961f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
962f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
963f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
964f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
965f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
966f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
967f44d3a6dSSatish Balay       v += 9;
968f44d3a6dSSatish Balay     }
969f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
970f44d3a6dSSatish Balay     z += 3; y += 3;
971f44d3a6dSSatish Balay   }
972*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
973*c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
974*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
975f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
976f44d3a6dSSatish Balay   return 0;
977f44d3a6dSSatish Balay }
978f44d3a6dSSatish Balay 
979f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
980f44d3a6dSSatish Balay {
981f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
982f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
983f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
984f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
985*c16cb8f2SBarry Smith   int             j,n;
986f44d3a6dSSatish Balay 
987*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
988*c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
989*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
990f44d3a6dSSatish Balay 
991f44d3a6dSSatish Balay   idx   = a->j;
992f44d3a6dSSatish Balay   v     = a->a;
993f44d3a6dSSatish Balay   ii    = a->i;
994f44d3a6dSSatish Balay 
995f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
996f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
997f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
998f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
999f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1000f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1001f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1002f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1003f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1004f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1005f44d3a6dSSatish Balay       v += 16;
1006f44d3a6dSSatish Balay     }
1007f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1008f44d3a6dSSatish Balay     z += 4; y += 4;
1009f44d3a6dSSatish Balay   }
1010*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1011*c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1012*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1013f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1014f44d3a6dSSatish Balay   return 0;
1015f44d3a6dSSatish Balay }
1016f44d3a6dSSatish Balay 
1017f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1018f44d3a6dSSatish Balay {
1019f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1020f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1021f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1022*c16cb8f2SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n;
1023f44d3a6dSSatish Balay 
1024*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1025*c16cb8f2SBarry Smith   VecGetArray_Fast(yy,y);
1026*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1027f44d3a6dSSatish Balay 
1028f44d3a6dSSatish Balay   idx   = a->j;
1029f44d3a6dSSatish Balay   v     = a->a;
1030f44d3a6dSSatish Balay   ii    = a->i;
1031f44d3a6dSSatish Balay 
1032f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1033f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1034f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1035f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1036f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1037f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1038f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1039f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1040f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1041f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1042f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1043f44d3a6dSSatish Balay       v += 25;
1044f44d3a6dSSatish Balay     }
1045f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1046f44d3a6dSSatish Balay     z += 5; y += 5;
1047f44d3a6dSSatish Balay   }
1048*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1049*c16cb8f2SBarry Smith   VecRestoreArray_Fast(yy,y);
1050*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1051f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1052f44d3a6dSSatish Balay   return 0;
1053f44d3a6dSSatish Balay }
1054f44d3a6dSSatish Balay 
1055f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1056f44d3a6dSSatish Balay {
1057f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1058f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1059f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1060f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1061f44d3a6dSSatish Balay 
1062f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1063f44d3a6dSSatish Balay 
1064*c16cb8f2SBarry Smith   VecGetArray_Fast(xx,x);
1065*c16cb8f2SBarry Smith   VecGetArray_Fast(zz,z);
1066f44d3a6dSSatish Balay 
1067f44d3a6dSSatish Balay   idx   = a->j;
1068f44d3a6dSSatish Balay   v     = a->a;
1069f44d3a6dSSatish Balay   ii    = a->i;
1070f44d3a6dSSatish Balay 
1071f44d3a6dSSatish Balay 
1072f44d3a6dSSatish Balay   if (!a->mult_work) {
1073f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1074f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1075f44d3a6dSSatish Balay   }
1076f44d3a6dSSatish Balay   work = a->mult_work;
1077f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1078f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1079f44d3a6dSSatish Balay     ncols = n*bs;
1080f44d3a6dSSatish Balay     workt = work;
1081f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1082f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1083f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1084f44d3a6dSSatish Balay       workt += bs;
1085f44d3a6dSSatish Balay     }
1086f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1087f44d3a6dSSatish Balay     v += n*bs2;
1088f44d3a6dSSatish Balay     z += bs;
1089f44d3a6dSSatish Balay   }
1090*c16cb8f2SBarry Smith   VecRestoreArray_Fast(xx,x);
1091*c16cb8f2SBarry Smith   VecRestoreArray_Fast(zz,z);
1092f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1093f44d3a6dSSatish Balay   return 0;
1094f44d3a6dSSatish Balay }
1095f44d3a6dSSatish Balay 
10961a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1097bb42667fSSatish Balay {
1098bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
10991a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1100bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1101bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
11029df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1103119a934aSSatish Balay 
1104119a934aSSatish Balay 
1105bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1106bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1107bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1108bb42667fSSatish Balay 
1109119a934aSSatish Balay   idx   = a->j;
1110119a934aSSatish Balay   v     = a->a;
1111119a934aSSatish Balay   ii    = a->i;
1112119a934aSSatish Balay 
1113119a934aSSatish Balay   switch (bs) {
1114119a934aSSatish Balay   case 1:
1115119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1116119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1117119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1118119a934aSSatish Balay       ib = idx + ai[i];
1119119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1120bb1453f0SSatish Balay         rval    = ib[j];
1121bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1122119a934aSSatish Balay       }
1123119a934aSSatish Balay     }
1124119a934aSSatish Balay     break;
1125119a934aSSatish Balay   case 2:
1126119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1127119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1128119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1129119a934aSSatish Balay       ib = idx + ai[i];
1130119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1131119a934aSSatish Balay         rval      = ib[j]*2;
1132bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1133bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1134119a934aSSatish Balay         v += 4;
1135119a934aSSatish Balay       }
1136119a934aSSatish Balay     }
1137119a934aSSatish Balay     break;
1138119a934aSSatish Balay   case 3:
1139119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1140119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1141119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1142119a934aSSatish Balay       ib = idx + ai[i];
1143119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1144119a934aSSatish Balay         rval      = ib[j]*3;
1145bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1146bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1147bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1148119a934aSSatish Balay         v += 9;
1149119a934aSSatish Balay       }
1150119a934aSSatish Balay     }
1151119a934aSSatish Balay     break;
1152119a934aSSatish Balay   case 4:
1153119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1154119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1155119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1156119a934aSSatish Balay       ib = idx + ai[i];
1157119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1158119a934aSSatish Balay         rval      = ib[j]*4;
1159bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1160bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1161bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1162bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1163119a934aSSatish Balay         v += 16;
1164119a934aSSatish Balay       }
1165119a934aSSatish Balay     }
1166119a934aSSatish Balay     break;
1167119a934aSSatish Balay   case 5:
1168119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1169119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1170119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1172119a934aSSatish Balay       ib = idx + ai[i];
1173119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1174119a934aSSatish Balay         rval      = ib[j]*5;
1175bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180119a934aSSatish Balay         v += 25;
1181119a934aSSatish Balay       }
1182119a934aSSatish Balay     }
1183119a934aSSatish Balay     break;
1184119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1185119a934aSSatish Balay     default: {
1186119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1187119a934aSSatish Balay       if (!a->mult_work) {
11883b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1189bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1190119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1191119a934aSSatish Balay       }
1192119a934aSSatish Balay       work = a->mult_work;
1193119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1194119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1195119a934aSSatish Balay         ncols = n*bs;
1196119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1197119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1198119a934aSSatish Balay         v += n*bs2;
1199119a934aSSatish Balay         x += bs;
1200119a934aSSatish Balay         workt = work;
1201119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1202119a934aSSatish Balay           zb = z + bs*(*idx++);
1203bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1204119a934aSSatish Balay           workt += bs;
1205119a934aSSatish Balay         }
1206119a934aSSatish Balay       }
1207119a934aSSatish Balay     }
1208119a934aSSatish Balay   }
12090419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12100419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1211faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1212faf6580fSSatish Balay   return 0;
1213faf6580fSSatish Balay }
1214faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215faf6580fSSatish Balay {
1216faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1217faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1218faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1219faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1220faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1221faf6580fSSatish Balay 
1222faf6580fSSatish Balay 
1223faf6580fSSatish Balay 
1224faf6580fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1225faf6580fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1226faf6580fSSatish Balay 
1227648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1228648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1229648c1d95SSatish Balay 
1230faf6580fSSatish Balay 
1231faf6580fSSatish Balay   idx   = a->j;
1232faf6580fSSatish Balay   v     = a->a;
1233faf6580fSSatish Balay   ii    = a->i;
1234faf6580fSSatish Balay 
1235faf6580fSSatish Balay   switch (bs) {
1236faf6580fSSatish Balay   case 1:
1237faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1238faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1239faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1240faf6580fSSatish Balay       ib = idx + ai[i];
1241faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1242faf6580fSSatish Balay         rval    = ib[j];
1243faf6580fSSatish Balay         z[rval] += *v++ * x1;
1244faf6580fSSatish Balay       }
1245faf6580fSSatish Balay     }
1246faf6580fSSatish Balay     break;
1247faf6580fSSatish Balay   case 2:
1248faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1249faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1250faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1251faf6580fSSatish Balay       ib = idx + ai[i];
1252faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1253faf6580fSSatish Balay         rval      = ib[j]*2;
1254faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1255faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1256faf6580fSSatish Balay         v += 4;
1257faf6580fSSatish Balay       }
1258faf6580fSSatish Balay     }
1259faf6580fSSatish Balay     break;
1260faf6580fSSatish Balay   case 3:
1261faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1262faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1263faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1264faf6580fSSatish Balay       ib = idx + ai[i];
1265faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1266faf6580fSSatish Balay         rval      = ib[j]*3;
1267faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1268faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1269faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1270faf6580fSSatish Balay         v += 9;
1271faf6580fSSatish Balay       }
1272faf6580fSSatish Balay     }
1273faf6580fSSatish Balay     break;
1274faf6580fSSatish Balay   case 4:
1275faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1276faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1277faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278faf6580fSSatish Balay       ib = idx + ai[i];
1279faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1280faf6580fSSatish Balay         rval      = ib[j]*4;
1281faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1282faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1283faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1284faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1285faf6580fSSatish Balay         v += 16;
1286faf6580fSSatish Balay       }
1287faf6580fSSatish Balay     }
1288faf6580fSSatish Balay     break;
1289faf6580fSSatish Balay   case 5:
1290faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1291faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1292faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1293faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1294faf6580fSSatish Balay       ib = idx + ai[i];
1295faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1296faf6580fSSatish Balay         rval      = ib[j]*5;
1297faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1298faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1299faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1300faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1301faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1302faf6580fSSatish Balay         v += 25;
1303faf6580fSSatish Balay       }
1304faf6580fSSatish Balay     }
1305faf6580fSSatish Balay     break;
1306faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1307faf6580fSSatish Balay     default: {
1308faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1309faf6580fSSatish Balay       if (!a->mult_work) {
1310faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1311faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1312faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1313faf6580fSSatish Balay       }
1314faf6580fSSatish Balay       work = a->mult_work;
1315faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1316faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1317faf6580fSSatish Balay         ncols = n*bs;
1318faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1319faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1320faf6580fSSatish Balay         v += n*bs2;
1321faf6580fSSatish Balay         x += bs;
1322faf6580fSSatish Balay         workt = work;
1323faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1324faf6580fSSatish Balay           zb = z + bs*(*idx++);
1325faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1326faf6580fSSatish Balay           workt += bs;
1327faf6580fSSatish Balay         }
1328faf6580fSSatish Balay       }
1329faf6580fSSatish Balay     }
1330faf6580fSSatish Balay   }
1331faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1332faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1333faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
13342593348eSBarry Smith   return 0;
13352593348eSBarry Smith }
13362593348eSBarry Smith 
1337de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
13382593348eSBarry Smith {
1339b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13409df24120SSatish Balay   if (nz)  *nz  = a->bs2*a->nz;
1341bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
1342bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
13432593348eSBarry Smith   return 0;
13442593348eSBarry Smith }
13452593348eSBarry Smith 
134691d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
134791d316f6SSatish Balay {
134891d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
134991d316f6SSatish Balay 
135091d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
135191d316f6SSatish Balay 
135291d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
135391d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1354a7c10996SSatish Balay       (a->nz != b->nz)) {
135591d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
135691d316f6SSatish Balay   }
135791d316f6SSatish Balay 
135891d316f6SSatish Balay   /* if the a->i are the same */
135991d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
136091d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
136191d316f6SSatish Balay   }
136291d316f6SSatish Balay 
136391d316f6SSatish Balay   /* if a->j are the same */
136491d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
136591d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
136691d316f6SSatish Balay   }
136791d316f6SSatish Balay 
136891d316f6SSatish Balay   /* if a->a are the same */
136991d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
137091d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137191d316f6SSatish Balay   }
137291d316f6SSatish Balay   *flg = PETSC_TRUE;
137391d316f6SSatish Balay   return 0;
137491d316f6SSatish Balay 
137591d316f6SSatish Balay }
137691d316f6SSatish Balay 
137791d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
137891d316f6SSatish Balay {
137991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13807e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
138117e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
138217e48fc4SSatish Balay 
138317e48fc4SSatish Balay   bs   = a->bs;
138417e48fc4SSatish Balay   aa   = a->a;
138517e48fc4SSatish Balay   ai   = a->i;
138617e48fc4SSatish Balay   aj   = a->j;
138717e48fc4SSatish Balay   ambs = a->mbs;
13889df24120SSatish Balay   bs2  = a->bs2;
138991d316f6SSatish Balay 
139091d316f6SSatish Balay   VecSet(&zero,v);
139191d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
13929867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
139317e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
139417e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
139517e48fc4SSatish Balay       if (aj[j] == i) {
139617e48fc4SSatish Balay         row  = i*bs;
13977e67e3f9SSatish Balay         aa_j = aa+j*bs2;
13987e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
139991d316f6SSatish Balay         break;
140091d316f6SSatish Balay       }
140191d316f6SSatish Balay     }
140291d316f6SSatish Balay   }
140391d316f6SSatish Balay   return 0;
140491d316f6SSatish Balay }
140591d316f6SSatish Balay 
14069867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
14079867e207SSatish Balay {
14089867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14099867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
14107e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
14119867e207SSatish Balay 
14129867e207SSatish Balay   ai  = a->i;
14139867e207SSatish Balay   aj  = a->j;
14149867e207SSatish Balay   aa  = a->a;
14159867e207SSatish Balay   m   = a->m;
14169867e207SSatish Balay   n   = a->n;
14179867e207SSatish Balay   bs  = a->bs;
14189867e207SSatish Balay   mbs = a->mbs;
14199df24120SSatish Balay   bs2 = a->bs2;
14209867e207SSatish Balay   if (ll) {
14219867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
14229867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
14239867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14249867e207SSatish Balay       M  = ai[i+1] - ai[i];
14259867e207SSatish Balay       li = l + i*bs;
14267e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14279867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14287e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
14299867e207SSatish Balay           (*v++) *= li[k%bs];
14309867e207SSatish Balay         }
14319867e207SSatish Balay       }
14329867e207SSatish Balay     }
14339867e207SSatish Balay   }
14349867e207SSatish Balay 
14359867e207SSatish Balay   if (rr) {
14369867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
14379867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
14389867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14399867e207SSatish Balay       M  = ai[i+1] - ai[i];
14407e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14419867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14429867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
14439867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
14449867e207SSatish Balay           x = ri[k];
14459867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
14469867e207SSatish Balay         }
14479867e207SSatish Balay       }
14489867e207SSatish Balay     }
14499867e207SSatish Balay   }
14509867e207SSatish Balay   return 0;
14519867e207SSatish Balay }
14529867e207SSatish Balay 
14539867e207SSatish Balay 
1454b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1455b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
14562a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1457736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1458736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
14591a6a6d98SBarry Smith 
14601a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
14611a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
14621a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
14631a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
14641a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
14651a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
14661a6a6d98SBarry Smith 
14677fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
14687fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
14697fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
14707fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
14717fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
14727fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
14732593348eSBarry Smith 
1474b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
14752593348eSBarry Smith {
1476b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14772593348eSBarry Smith   Scalar      *v = a->a;
14782593348eSBarry Smith   double      sum = 0.0;
14799df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
14802593348eSBarry Smith 
14812593348eSBarry Smith   if (type == NORM_FROBENIUS) {
14820419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
14832593348eSBarry Smith #if defined(PETSC_COMPLEX)
14842593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
14852593348eSBarry Smith #else
14862593348eSBarry Smith       sum += (*v)*(*v); v++;
14872593348eSBarry Smith #endif
14882593348eSBarry Smith     }
14892593348eSBarry Smith     *norm = sqrt(sum);
14902593348eSBarry Smith   }
14912593348eSBarry Smith   else {
1492b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
14932593348eSBarry Smith   }
14942593348eSBarry Smith   return 0;
14952593348eSBarry Smith }
14962593348eSBarry Smith 
14972593348eSBarry Smith /*
14982593348eSBarry Smith      note: This can only work for identity for row and col. It would
14992593348eSBarry Smith    be good to check this and otherwise generate an error.
15002593348eSBarry Smith */
1501b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
15022593348eSBarry Smith {
1503b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15042593348eSBarry Smith   Mat         outA;
1505de6a44a3SBarry Smith   int         ierr;
15062593348eSBarry Smith 
1507b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
15082593348eSBarry Smith 
15092593348eSBarry Smith   outA          = inA;
15102593348eSBarry Smith   inA->factor   = FACTOR_LU;
15112593348eSBarry Smith   a->row        = row;
15122593348eSBarry Smith   a->col        = col;
15132593348eSBarry Smith 
1514de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
15152593348eSBarry Smith 
15162593348eSBarry Smith   if (!a->diag) {
1517b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
15182593348eSBarry Smith   }
15197fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
15202593348eSBarry Smith   return 0;
15212593348eSBarry Smith }
15222593348eSBarry Smith 
1523b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
15242593348eSBarry Smith {
1525b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15269df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1527b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1528b6490206SBarry Smith   PLogFlops(totalnz);
15292593348eSBarry Smith   return 0;
15302593348eSBarry Smith }
15312593348eSBarry Smith 
15327e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
15337e67e3f9SSatish Balay {
15347e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15357e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1536a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
15379df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
15387e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
15397e67e3f9SSatish Balay 
15407e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
15417e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
15427e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
15437e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1544a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
15457e67e3f9SSatish Balay     nrow = ailen[brow];
15467e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
15477e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
15487e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1549a7c10996SSatish Balay       col  = in[l] ;
15507e67e3f9SSatish Balay       bcol = col/bs;
15517e67e3f9SSatish Balay       cidx = col%bs;
15527e67e3f9SSatish Balay       ridx = row%bs;
15537e67e3f9SSatish Balay       high = nrow;
15547e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
15557e67e3f9SSatish Balay       while (high-low > 5) {
15567e67e3f9SSatish Balay         t = (low+high)/2;
15577e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
15587e67e3f9SSatish Balay         else             low  = t;
15597e67e3f9SSatish Balay       }
15607e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
15617e67e3f9SSatish Balay         if (rp[i] > bcol) break;
15627e67e3f9SSatish Balay         if (rp[i] == bcol) {
15637e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
15647e67e3f9SSatish Balay           goto finished;
15657e67e3f9SSatish Balay         }
15667e67e3f9SSatish Balay       }
15677e67e3f9SSatish Balay       *v++ = zero;
15687e67e3f9SSatish Balay       finished:;
15697e67e3f9SSatish Balay     }
15707e67e3f9SSatish Balay   }
15717e67e3f9SSatish Balay   return 0;
15727e67e3f9SSatish Balay }
15737e67e3f9SSatish Balay 
15745a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
15755a838052SSatish Balay {
15765a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
15775a838052SSatish Balay   *bs = baij->bs;
15785a838052SSatish Balay   return 0;
15795a838052SSatish Balay }
15805a838052SSatish Balay 
1581d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1582d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1583d9b7c43dSSatish Balay {
1584d9b7c43dSSatish Balay   int i,row;
1585d9b7c43dSSatish Balay   row = idx[0];
1586d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1587d9b7c43dSSatish Balay 
1588d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1589d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1590d9b7c43dSSatish Balay   }
1591d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1592d9b7c43dSSatish Balay   return 0;
1593d9b7c43dSSatish Balay }
1594d9b7c43dSSatish Balay 
1595d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1596d9b7c43dSSatish Balay {
1597d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1598d9b7c43dSSatish Balay   IS          is_local;
1599d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1600d9b7c43dSSatish Balay   PetscTruth  flg;
1601d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1602d9b7c43dSSatish Balay 
1603d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1604d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1605d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1606d9b7c43dSSatish Balay   ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1607d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1608d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1609d9b7c43dSSatish Balay 
1610d9b7c43dSSatish Balay   i = 0;
1611d9b7c43dSSatish Balay   while (i < is_n) {
1612d9b7c43dSSatish Balay     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1613d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1614d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1615d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1616d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1617d9b7c43dSSatish Balay       i += bs;
1618d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1619d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1620d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1621d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1622d9b7c43dSSatish Balay         aa[0] = zero;
1623d9b7c43dSSatish Balay         aa+=bs;
1624d9b7c43dSSatish Balay       }
1625d9b7c43dSSatish Balay       i++;
1626d9b7c43dSSatish Balay     }
1627d9b7c43dSSatish Balay   }
1628d9b7c43dSSatish Balay   if (diag) {
1629d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1630d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1631d9b7c43dSSatish Balay     }
1632d9b7c43dSSatish Balay   }
1633d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1634d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1635d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1636d9b7c43dSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1637d9b7c43dSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1638d9b7c43dSSatish Balay 
1639d9b7c43dSSatish Balay   return 0;
1640d9b7c43dSSatish Balay }
1641ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1642ba4ca20aSSatish Balay {
1643ba4ca20aSSatish Balay   static int called = 0;
1644ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1645ba4ca20aSSatish Balay 
1646ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1647ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1648ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1649ba4ca20aSSatish Balay   return 0;
1650ba4ca20aSSatish Balay }
1651d9b7c43dSSatish Balay 
16522593348eSBarry Smith /* -------------------------------------------------------------------*/
1653cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
16549867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1655f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1656faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
16571a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1658b6490206SBarry Smith        0,0,
1659de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1660b6490206SBarry Smith        0,
1661f2501298SSatish Balay        MatTranspose_SeqBAIJ,
166217e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
16639867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1664584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1665b6490206SBarry Smith        0,
1666d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1667b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
16687fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1669b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1670de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1671b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1672736121d4SSatish Balay        MatGetSubMatrix_SeqBAIJ,0,
1673b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1674b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1675af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
16767e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1677ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
16785a838052SSatish Balay        0,0,0,MatGetBlockSize_SeqBAIJ};
16792593348eSBarry Smith 
16802593348eSBarry Smith /*@C
168144cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
168244cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
168344cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
16842bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
16852bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
16862593348eSBarry Smith 
16872593348eSBarry Smith    Input Parameters:
16882593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1689b6490206SBarry Smith .  bs - size of block
16902593348eSBarry Smith .  m - number of rows
16912593348eSBarry Smith .  n - number of columns
1692b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
16932bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
16942bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
16952593348eSBarry Smith 
16962593348eSBarry Smith    Output Parameter:
16972593348eSBarry Smith .  A - the matrix
16982593348eSBarry Smith 
16992593348eSBarry Smith    Notes:
170044cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
17012593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
170244cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
17032593348eSBarry Smith 
17042593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
17052593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
17062593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
17072593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
17082593348eSBarry Smith 
170944cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17102593348eSBarry Smith @*/
1711b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
17122593348eSBarry Smith {
17132593348eSBarry Smith   Mat         B;
1714b6490206SBarry Smith   Mat_SeqBAIJ *b;
1715f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1716b6490206SBarry Smith 
1717f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1718f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
17192593348eSBarry Smith 
17202593348eSBarry Smith   *A = 0;
1721b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
17222593348eSBarry Smith   PLogObjectCreate(B);
1723b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
172444cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
17252593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17261a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
17271a6a6d98SBarry Smith   if (!flg) {
17287fc0212eSBarry Smith     switch (bs) {
17297fc0212eSBarry Smith       case 1:
17307fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17311a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
173239b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1733f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
17347fc0212eSBarry Smith        break;
17354eeb42bcSBarry Smith       case 2:
17364eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
17371a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
173839b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1739f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
17406d84be18SBarry Smith         break;
1741f327dd97SBarry Smith       case 3:
1742f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
17431a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
174439b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1745f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
17464eeb42bcSBarry Smith         break;
17476d84be18SBarry Smith       case 4:
17486d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
17491a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
175039b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1751f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
17526d84be18SBarry Smith         break;
17536d84be18SBarry Smith       case 5:
17546d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
17551a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
175639b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1757f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
17586d84be18SBarry Smith         break;
17597fc0212eSBarry Smith     }
17601a6a6d98SBarry Smith   }
1761b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1762b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
17632593348eSBarry Smith   B->factor           = 0;
17642593348eSBarry Smith   B->lupivotthreshold = 1.0;
17652593348eSBarry Smith   b->row              = 0;
17662593348eSBarry Smith   b->col              = 0;
17672593348eSBarry Smith   b->reallocs         = 0;
17682593348eSBarry Smith 
176944cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
177044cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1771b6490206SBarry Smith   b->mbs     = mbs;
1772f2501298SSatish Balay   b->nbs     = nbs;
1773b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
17742593348eSBarry Smith   if (nnz == PETSC_NULL) {
1775119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
17762593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1777b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1778b6490206SBarry Smith     nz = nz*mbs;
17792593348eSBarry Smith   }
17802593348eSBarry Smith   else {
17812593348eSBarry Smith     nz = 0;
1782b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
17832593348eSBarry Smith   }
17842593348eSBarry Smith 
17852593348eSBarry Smith   /* allocate the matrix space */
17867e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
17872593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
17887e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
17897e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
17902593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
17912593348eSBarry Smith   b->i  = b->j + nz;
17922593348eSBarry Smith   b->singlemalloc = 1;
17932593348eSBarry Smith 
1794de6a44a3SBarry Smith   b->i[0] = 0;
1795b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
17962593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
17972593348eSBarry Smith   }
17982593348eSBarry Smith 
1799b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1800b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1801b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1802b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
18032593348eSBarry Smith 
1804b6490206SBarry Smith   b->bs               = bs;
18059df24120SSatish Balay   b->bs2              = bs2;
1806b6490206SBarry Smith   b->mbs              = mbs;
18072593348eSBarry Smith   b->nz               = 0;
180818e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
18092593348eSBarry Smith   b->sorted           = 0;
18102593348eSBarry Smith   b->roworiented      = 1;
18112593348eSBarry Smith   b->nonew            = 0;
18122593348eSBarry Smith   b->diag             = 0;
18132593348eSBarry Smith   b->solve_work       = 0;
1814de6a44a3SBarry Smith   b->mult_work        = 0;
18152593348eSBarry Smith   b->spptr            = 0;
18162593348eSBarry Smith   *A = B;
18172593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
18182593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
18192593348eSBarry Smith   return 0;
18202593348eSBarry Smith }
18212593348eSBarry Smith 
1822b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
18232593348eSBarry Smith {
18242593348eSBarry Smith   Mat         C;
1825b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
18269df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1827de6a44a3SBarry Smith 
1828de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
18292593348eSBarry Smith 
18302593348eSBarry Smith   *B = 0;
1831b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
18322593348eSBarry Smith   PLogObjectCreate(C);
1833b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
18342593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1835b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1836b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
18372593348eSBarry Smith   C->factor     = A->factor;
18382593348eSBarry Smith   c->row        = 0;
18392593348eSBarry Smith   c->col        = 0;
18402593348eSBarry Smith   C->assembled  = PETSC_TRUE;
18412593348eSBarry Smith 
184244cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
184344cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
184444cd7ae7SLois Curfman McInnes   C->M          = a->m;
184544cd7ae7SLois Curfman McInnes   C->N          = a->n;
184644cd7ae7SLois Curfman McInnes 
1847b6490206SBarry Smith   c->bs         = a->bs;
18489df24120SSatish Balay   c->bs2        = a->bs2;
1849b6490206SBarry Smith   c->mbs        = a->mbs;
1850b6490206SBarry Smith   c->nbs        = a->nbs;
18512593348eSBarry Smith 
1852b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1853b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1854b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
18552593348eSBarry Smith     c->imax[i] = a->imax[i];
18562593348eSBarry Smith     c->ilen[i] = a->ilen[i];
18572593348eSBarry Smith   }
18582593348eSBarry Smith 
18592593348eSBarry Smith   /* allocate the matrix space */
18602593348eSBarry Smith   c->singlemalloc = 1;
18617e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
18622593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
18637e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1864de6a44a3SBarry Smith   c->i  = c->j + nz;
1865b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1866b6490206SBarry Smith   if (mbs > 0) {
1867de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
18682593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
18697e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
18702593348eSBarry Smith     }
18712593348eSBarry Smith   }
18722593348eSBarry Smith 
1873b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
18742593348eSBarry Smith   c->sorted      = a->sorted;
18752593348eSBarry Smith   c->roworiented = a->roworiented;
18762593348eSBarry Smith   c->nonew       = a->nonew;
18772593348eSBarry Smith 
18782593348eSBarry Smith   if (a->diag) {
1879b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1880b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1881b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
18822593348eSBarry Smith       c->diag[i] = a->diag[i];
18832593348eSBarry Smith     }
18842593348eSBarry Smith   }
18852593348eSBarry Smith   else c->diag          = 0;
18862593348eSBarry Smith   c->nz                 = a->nz;
18872593348eSBarry Smith   c->maxnz              = a->maxnz;
18882593348eSBarry Smith   c->solve_work         = 0;
18892593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
18907fc0212eSBarry Smith   c->mult_work          = 0;
18912593348eSBarry Smith   *B = C;
18922593348eSBarry Smith   return 0;
18932593348eSBarry Smith }
18942593348eSBarry Smith 
189519bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
18962593348eSBarry Smith {
1897b6490206SBarry Smith   Mat_SeqBAIJ  *a;
18982593348eSBarry Smith   Mat          B;
1899de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1900b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
190135aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1902a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1903b6490206SBarry Smith   Scalar       *aa;
190419bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
19052593348eSBarry Smith 
19061a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1907de6a44a3SBarry Smith   bs2  = bs*bs;
1908b6490206SBarry Smith 
19092593348eSBarry Smith   MPI_Comm_size(comm,&size);
1910b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
191190ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
191277c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1913de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
19142593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
19152593348eSBarry Smith 
1916b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
191735aab85fSBarry Smith 
191835aab85fSBarry Smith   /*
191935aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
192035aab85fSBarry Smith     divisible by the blocksize
192135aab85fSBarry Smith   */
1922b6490206SBarry Smith   mbs        = M/bs;
192335aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
192435aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
192535aab85fSBarry Smith   else                  mbs++;
192635aab85fSBarry Smith   if (extra_rows) {
19271a6a6d98SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
192835aab85fSBarry Smith   }
1929b6490206SBarry Smith 
19302593348eSBarry Smith   /* read in row lengths */
193135aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
193277c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
193335aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
19342593348eSBarry Smith 
1935b6490206SBarry Smith   /* read in column indices */
193635aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
193777c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
193835aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1939b6490206SBarry Smith 
1940b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1941b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1942b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
194335aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
194435aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
194535aab85fSBarry Smith   masked = mask + mbs;
1946b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1947b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
194835aab85fSBarry Smith     nmask = 0;
1949b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1950b6490206SBarry Smith       kmax = rowlengths[rowcount];
1951b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
195235aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
195335aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1954b6490206SBarry Smith       }
1955b6490206SBarry Smith       rowcount++;
1956b6490206SBarry Smith     }
195735aab85fSBarry Smith     browlengths[i] += nmask;
195835aab85fSBarry Smith     /* zero out the mask elements we set */
195935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1960b6490206SBarry Smith   }
1961b6490206SBarry Smith 
19622593348eSBarry Smith   /* create our matrix */
196335aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
196435aab85fSBarry Smith          CHKERRQ(ierr);
19652593348eSBarry Smith   B = *A;
1966b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
19672593348eSBarry Smith 
19682593348eSBarry Smith   /* set matrix "i" values */
1969de6a44a3SBarry Smith   a->i[0] = 0;
1970b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1971b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1972b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
19732593348eSBarry Smith   }
1974b6490206SBarry Smith   a->nz         = 0;
1975b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
19762593348eSBarry Smith 
1977b6490206SBarry Smith   /* read in nonzero values */
197835aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
197977c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
198035aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1981b6490206SBarry Smith 
1982b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1983b6490206SBarry Smith   nzcount = 0; jcount = 0;
1984b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1985b6490206SBarry Smith     nzcountb = nzcount;
198635aab85fSBarry Smith     nmask    = 0;
1987b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1988b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1989b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
199035aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
199135aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1992b6490206SBarry Smith       }
1993b6490206SBarry Smith       rowcount++;
1994b6490206SBarry Smith     }
1995de6a44a3SBarry Smith     /* sort the masked values */
199677c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1997de6a44a3SBarry Smith 
1998b6490206SBarry Smith     /* set "j" values into matrix */
1999b6490206SBarry Smith     maskcount = 1;
200035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
200135aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2002de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2003b6490206SBarry Smith     }
2004b6490206SBarry Smith     /* set "a" values into matrix */
2005de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2006b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2007b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2008b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2009de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2010de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2011de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2012de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2013b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2014b6490206SBarry Smith       }
2015b6490206SBarry Smith     }
201635aab85fSBarry Smith     /* zero out the mask elements we set */
201735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2018b6490206SBarry Smith   }
201935aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2020b6490206SBarry Smith 
2021b6490206SBarry Smith   PetscFree(rowlengths);
2022b6490206SBarry Smith   PetscFree(browlengths);
2023b6490206SBarry Smith   PetscFree(aa);
2024b6490206SBarry Smith   PetscFree(jj);
2025b6490206SBarry Smith   PetscFree(mask);
2026b6490206SBarry Smith 
2027b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2028de6a44a3SBarry Smith 
2029de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2030de6a44a3SBarry Smith   if (flg) {
203119bcc07fSBarry Smith     Viewer tviewer;
203219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
203390ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
203419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
203519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2036de6a44a3SBarry Smith   }
2037de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2038de6a44a3SBarry Smith   if (flg) {
203919bcc07fSBarry Smith     Viewer tviewer;
204019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
204190ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
204219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
204319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2044de6a44a3SBarry Smith   }
2045de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2046de6a44a3SBarry Smith   if (flg) {
204719bcc07fSBarry Smith     Viewer tviewer;
204819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
204919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
205019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2051de6a44a3SBarry Smith   }
2052de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2053de6a44a3SBarry Smith   if (flg) {
205419bcc07fSBarry Smith     Viewer tviewer;
205519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
205690ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
205719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
205819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2059de6a44a3SBarry Smith   }
2060de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2061de6a44a3SBarry Smith   if (flg) {
206219bcc07fSBarry Smith     Viewer tviewer;
206319bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
206419bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
206519bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
206619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2067de6a44a3SBarry Smith   }
20682593348eSBarry Smith   return 0;
20692593348eSBarry Smith }
20702593348eSBarry Smith 
20712593348eSBarry Smith 
20722593348eSBarry Smith 
2073