xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3270192ae24025519c07c26ff54d472a0b327898)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*3270192aSSatish Balay static char vcid[] = "$Id: baij.c,v 1.61 1996/07/31 20:25:01 balay Exp balay $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
111a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
121a6a6d98SBarry Smith #include "src/inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
143b547af2SSatish Balay 
15bcd2baecSBarry Smith extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17a2ce50c7SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41a7c10996SSatish Balay   ishift = 0;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
441a6a6d98SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
451a6a6d98SBarry Smith     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
462593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
47bcd2baecSBarry Smith   } else {
48bcd2baecSBarry Smith     if (ishift == oshift) {
491a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50bcd2baecSBarry Smith     }
51bcd2baecSBarry Smith     else if (ishift == -1) {
52bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
531a6a6d98SBarry Smith       int nz = a->i[n] - 1;
54bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
551a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
561a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
581a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
59bcd2baecSBarry Smith     } else {
60bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
611a6a6d98SBarry Smith       int nz = a->i[n] - 1;
62bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
631a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
641a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
661a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
67bcd2baecSBarry Smith     }
68bcd2baecSBarry Smith   }
692593348eSBarry Smith   return 0;
702593348eSBarry Smith }
712593348eSBarry Smith 
72de6a44a3SBarry Smith /*
73de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
74de6a44a3SBarry Smith */
75de6a44a3SBarry Smith 
76de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
77de6a44a3SBarry Smith {
78de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
797fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
80de6a44a3SBarry Smith 
81de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
837fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
84de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85de6a44a3SBarry Smith       if (a->j[j] == i) {
86de6a44a3SBarry Smith         diag[i] = j;
87de6a44a3SBarry Smith         break;
88de6a44a3SBarry Smith       }
89de6a44a3SBarry Smith     }
90de6a44a3SBarry Smith   }
91de6a44a3SBarry Smith   a->diag = diag;
92de6a44a3SBarry Smith   return 0;
93de6a44a3SBarry Smith }
942593348eSBarry Smith 
952593348eSBarry Smith #include "draw.h"
962593348eSBarry Smith #include "pinclude/pviewer.h"
9777c4ece6SBarry Smith #include "sys.h"
982593348eSBarry Smith 
99b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1002593348eSBarry Smith {
101b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1029df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103b6490206SBarry Smith   Scalar      *aa;
1042593348eSBarry Smith 
10590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1062593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1072593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1082593348eSBarry Smith   col_lens[1] = a->m;
1092593348eSBarry Smith   col_lens[2] = a->n;
1107e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1112593348eSBarry Smith 
1122593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
113b6490206SBarry Smith   count = 0;
114b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
115b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
116b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117b6490206SBarry Smith     }
1182593348eSBarry Smith   }
11977c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1202593348eSBarry Smith   PetscFree(col_lens);
1212593348eSBarry Smith 
1222593348eSBarry Smith   /* store column indices (zero start index) */
1237e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124b6490206SBarry Smith   count = 0;
125b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
126b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
127b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
129b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1302593348eSBarry Smith         }
1312593348eSBarry Smith       }
132b6490206SBarry Smith     }
133b6490206SBarry Smith   }
1347e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135b6490206SBarry Smith   PetscFree(jj);
1362593348eSBarry Smith 
1372593348eSBarry Smith   /* store nonzero values */
1387e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139b6490206SBarry Smith   count = 0;
140b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
141b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
142b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1447e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
145b6490206SBarry Smith         }
146b6490206SBarry Smith       }
147b6490206SBarry Smith     }
148b6490206SBarry Smith   }
1497e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150b6490206SBarry Smith   PetscFree(aa);
1512593348eSBarry Smith   return 0;
1522593348eSBarry Smith }
1532593348eSBarry Smith 
154b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1552593348eSBarry Smith {
156b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1579df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1582593348eSBarry Smith   FILE        *fd;
1592593348eSBarry Smith   char        *outputname;
1602593348eSBarry Smith 
16190ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1622593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16390ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
1647ddc982cSLois Curfman McInnes   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
1657ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1662593348eSBarry Smith   }
16790ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
168b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1692593348eSBarry Smith   }
17044cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17144cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17244cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17344cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17444cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17544cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17644cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17744cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17844cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17944cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18144cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18244cd7ae7SLois Curfman McInnes #else
18344cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18444cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18544cd7ae7SLois Curfman McInnes #endif
18644cd7ae7SLois Curfman McInnes           }
18744cd7ae7SLois Curfman McInnes         }
18844cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18944cd7ae7SLois Curfman McInnes       }
19044cd7ae7SLois Curfman McInnes     }
19144cd7ae7SLois Curfman McInnes   }
1922593348eSBarry Smith   else {
193b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
194b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
195b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
196b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19888685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1997e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
20088685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2017e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20288685aaeSLois Curfman McInnes           }
20388685aaeSLois Curfman McInnes           else {
2047e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20588685aaeSLois Curfman McInnes           }
20688685aaeSLois Curfman McInnes #else
2077e67e3f9SSatish Balay             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20888685aaeSLois Curfman McInnes #endif
2092593348eSBarry Smith           }
2102593348eSBarry Smith         }
2112593348eSBarry Smith         fprintf(fd,"\n");
2122593348eSBarry Smith       }
2132593348eSBarry Smith     }
214b6490206SBarry Smith   }
2152593348eSBarry Smith   fflush(fd);
2162593348eSBarry Smith   return 0;
2172593348eSBarry Smith }
2182593348eSBarry Smith 
219*3270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
220*3270192aSSatish Balay {
221*3270192aSSatish Balay   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
222*3270192aSSatish Balay   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
223*3270192aSSatish Balay   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
224*3270192aSSatish Balay   Scalar       *aa;
225*3270192aSSatish Balay   Draw         draw;
226*3270192aSSatish Balay   DrawButton   button;
227*3270192aSSatish Balay   PetscTruth   isnull;
228*3270192aSSatish Balay 
229*3270192aSSatish Balay   ViewerDrawGetDraw(viewer,&draw);
230*3270192aSSatish Balay   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
231*3270192aSSatish Balay 
232*3270192aSSatish Balay   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
233*3270192aSSatish Balay   xr += w;    yr += h;  xl = -w;     yl = -h;
234*3270192aSSatish Balay   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
235*3270192aSSatish Balay   /* loop over matrix elements drawing boxes */
236*3270192aSSatish Balay   color = DRAW_BLUE;
237*3270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
238*3270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
239*3270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
240*3270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
241*3270192aSSatish Balay       aa = a->a + j*bs2;
242*3270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
243*3270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
244*3270192aSSatish Balay           if (PetscReal(*aa++) >=  0.) continue;
245*3270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
246*3270192aSSatish Balay         }
247*3270192aSSatish Balay       }
248*3270192aSSatish Balay     }
249*3270192aSSatish Balay   }
250*3270192aSSatish Balay   color = DRAW_CYAN;
251*3270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
252*3270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
253*3270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
254*3270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
255*3270192aSSatish Balay       aa = a->a + j*bs2;
256*3270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
257*3270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
258*3270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
259*3270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
260*3270192aSSatish Balay         }
261*3270192aSSatish Balay       }
262*3270192aSSatish Balay     }
263*3270192aSSatish Balay   }
264*3270192aSSatish Balay 
265*3270192aSSatish Balay   color = DRAW_RED;
266*3270192aSSatish Balay   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
267*3270192aSSatish Balay     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
268*3270192aSSatish Balay       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
269*3270192aSSatish Balay       x_l = a->j[j]*bs; x_r = x_l + 1.0;
270*3270192aSSatish Balay       aa = a->a + j*bs2;
271*3270192aSSatish Balay       for ( k=0; k<bs; k++ ) {
272*3270192aSSatish Balay         for ( l=0; l<bs; l++ ) {
273*3270192aSSatish Balay           if (PetscReal(*aa++) <= 0.) continue;
274*3270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
275*3270192aSSatish Balay         }
276*3270192aSSatish Balay       }
277*3270192aSSatish Balay     }
278*3270192aSSatish Balay   }
279*3270192aSSatish Balay 
280*3270192aSSatish Balay   DrawFlush(draw);
281*3270192aSSatish Balay   DrawGetPause(draw,&pause);
282*3270192aSSatish Balay   if (pause >= 0) { PetscSleep(pause); return 0;}
283*3270192aSSatish Balay 
284*3270192aSSatish Balay   /* allow the matrix to zoom or shrink */
285*3270192aSSatish Balay   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
286*3270192aSSatish Balay   while (button != BUTTON_RIGHT) {
287*3270192aSSatish Balay     DrawClear(draw);
288*3270192aSSatish Balay     if (button == BUTTON_LEFT) scale = .5;
289*3270192aSSatish Balay     else if (button == BUTTON_CENTER) scale = 2.;
290*3270192aSSatish Balay     xl = scale*(xl + w - xc) + xc - w*scale;
291*3270192aSSatish Balay     xr = scale*(xr - w - xc) + xc + w*scale;
292*3270192aSSatish Balay     yl = scale*(yl + h - yc) + yc - h*scale;
293*3270192aSSatish Balay     yr = scale*(yr - h - yc) + yc + h*scale;
294*3270192aSSatish Balay     w *= scale; h *= scale;
295*3270192aSSatish Balay     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
296*3270192aSSatish Balay     color = DRAW_BLUE;
297*3270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
298*3270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
299*3270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
300*3270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
301*3270192aSSatish Balay         aa = a->a + j*bs2;
302*3270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
303*3270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
304*3270192aSSatish Balay             if (PetscReal(*aa++) >=  0.) continue;
305*3270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
306*3270192aSSatish Balay           }
307*3270192aSSatish Balay         }
308*3270192aSSatish Balay       }
309*3270192aSSatish Balay     }
310*3270192aSSatish Balay     color = DRAW_CYAN;
311*3270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
312*3270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
313*3270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
314*3270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
315*3270192aSSatish Balay         aa = a->a + j*bs2;
316*3270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
317*3270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
318*3270192aSSatish Balay           if (PetscReal(*aa++) != 0.) continue;
319*3270192aSSatish Balay           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
320*3270192aSSatish Balay           }
321*3270192aSSatish Balay         }
322*3270192aSSatish Balay       }
323*3270192aSSatish Balay     }
324*3270192aSSatish Balay 
325*3270192aSSatish Balay     color = DRAW_RED;
326*3270192aSSatish Balay     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
327*3270192aSSatish Balay       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
328*3270192aSSatish Balay         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
329*3270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
330*3270192aSSatish Balay         aa = a->a + j*bs2;
331*3270192aSSatish Balay         for ( k=0; k<bs; k++ ) {
332*3270192aSSatish Balay           for ( l=0; l<bs; l++ ) {
333*3270192aSSatish Balay             if (PetscReal(*aa++) <= 0.) continue;
334*3270192aSSatish Balay             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
335*3270192aSSatish Balay           }
336*3270192aSSatish Balay         }
337*3270192aSSatish Balay       }
338*3270192aSSatish Balay     }
339*3270192aSSatish Balay     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
340*3270192aSSatish Balay   }
341*3270192aSSatish Balay   return 0;
342*3270192aSSatish Balay }
343*3270192aSSatish Balay 
344b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
3452593348eSBarry Smith {
3462593348eSBarry Smith   Mat         A = (Mat) obj;
34719bcc07fSBarry Smith   ViewerType  vtype;
34819bcc07fSBarry Smith   int         ierr;
3492593348eSBarry Smith 
35019bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
35119bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
352b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
3532593348eSBarry Smith   }
35419bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
355b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
3562593348eSBarry Smith   }
35719bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
358b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
3592593348eSBarry Smith   }
36019bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
361*3270192aSSatish Balay     return MatView_SeqBAIJ_Draw(A,viewer);
3622593348eSBarry Smith   }
3632593348eSBarry Smith   return 0;
3642593348eSBarry Smith }
365b6490206SBarry Smith 
366119a934aSSatish Balay #define CHUNKSIZE  10
367cd0e1443SSatish Balay 
368cd0e1443SSatish Balay /* This version has row oriented v  */
369cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
370cd0e1443SSatish Balay {
371cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
372cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
373cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
374a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
3759df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
376cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
377cd0e1443SSatish Balay 
378cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
379cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
380cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
381cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
382a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
383cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
384cd0e1443SSatish Balay     low = 0;
385cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
386cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
387cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
388a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
389cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
390cd0e1443SSatish Balay       if (roworiented) {
391cd0e1443SSatish Balay         value = *v++;
392cd0e1443SSatish Balay       }
393cd0e1443SSatish Balay       else {
394cd0e1443SSatish Balay         value = v[k + l*m];
395cd0e1443SSatish Balay       }
396cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
397cd0e1443SSatish Balay       while (high-low > 5) {
398cd0e1443SSatish Balay         t = (low+high)/2;
399cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
400cd0e1443SSatish Balay         else             low  = t;
401cd0e1443SSatish Balay       }
402cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
403cd0e1443SSatish Balay         if (rp[i] > bcol) break;
404cd0e1443SSatish Balay         if (rp[i] == bcol) {
4057e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
406cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
407cd0e1443SSatish Balay           else                  *bap  = value;
408cd0e1443SSatish Balay           goto noinsert;
409cd0e1443SSatish Balay         }
410cd0e1443SSatish Balay       }
411cd0e1443SSatish Balay       if (nonew) goto noinsert;
412cd0e1443SSatish Balay       if (nrow >= rmax) {
413cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
414cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
415cd0e1443SSatish Balay         Scalar *new_a;
416cd0e1443SSatish Balay 
417cd0e1443SSatish Balay         /* malloc new storage space */
4187e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
419cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
4207e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
421cd0e1443SSatish Balay         new_i   = new_j + new_nz;
422cd0e1443SSatish Balay 
423cd0e1443SSatish Balay         /* copy over old data into new slots */
424cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
4257e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
426a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
427a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
428a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
429cd0e1443SSatish Balay                                                            len*sizeof(int));
430a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
431a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
432a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
433a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
434cd0e1443SSatish Balay         /* free up old matrix storage */
435cd0e1443SSatish Balay         PetscFree(a->a);
436cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
437cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
438cd0e1443SSatish Balay         a->singlemalloc = 1;
439cd0e1443SSatish Balay 
440a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
441cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
4427e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
44318e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
444cd0e1443SSatish Balay         a->reallocs++;
445119a934aSSatish Balay         a->nz++;
446cd0e1443SSatish Balay       }
4477e67e3f9SSatish Balay       N = nrow++ - 1;
448cd0e1443SSatish Balay       /* shift up all the later entries in this row */
449cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
450cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
4517e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
452cd0e1443SSatish Balay       }
4537e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
454cd0e1443SSatish Balay       rp[i] = bcol;
4557e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
456cd0e1443SSatish Balay       noinsert:;
457cd0e1443SSatish Balay       low = i;
458cd0e1443SSatish Balay     }
459cd0e1443SSatish Balay     ailen[brow] = nrow;
460cd0e1443SSatish Balay   }
461cd0e1443SSatish Balay   return 0;
462cd0e1443SSatish Balay }
463cd0e1443SSatish Balay 
4640b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
4650b824a48SSatish Balay {
4660b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4670b824a48SSatish Balay   *m = a->m; *n = a->n;
4680b824a48SSatish Balay   return 0;
4690b824a48SSatish Balay }
4700b824a48SSatish Balay 
4710b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
4720b824a48SSatish Balay {
4730b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4740b824a48SSatish Balay   *m = 0; *n = a->m;
4750b824a48SSatish Balay   return 0;
4760b824a48SSatish Balay }
4770b824a48SSatish Balay 
4789867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4799867e207SSatish Balay {
4809867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
4817e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
4829867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
4839867e207SSatish Balay 
4849867e207SSatish Balay   bs  = a->bs;
4859867e207SSatish Balay   ai  = a->i;
4869867e207SSatish Balay   aj  = a->j;
4879867e207SSatish Balay   aa  = a->a;
4889df24120SSatish Balay   bs2 = a->bs2;
4899867e207SSatish Balay 
4909867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
4919867e207SSatish Balay 
4929867e207SSatish Balay   bn  = row/bs;   /* Block number */
4939867e207SSatish Balay   bp  = row % bs; /* Block Position */
4949867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
4959867e207SSatish Balay   *nz = bs*M;
4969867e207SSatish Balay 
4979867e207SSatish Balay   if (v) {
4989867e207SSatish Balay     *v = 0;
4999867e207SSatish Balay     if (*nz) {
5009867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
5019867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5029867e207SSatish Balay         v_i  = *v + i*bs;
5037e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
5047e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
5059867e207SSatish Balay       }
5069867e207SSatish Balay     }
5079867e207SSatish Balay   }
5089867e207SSatish Balay 
5099867e207SSatish Balay   if (idx) {
5109867e207SSatish Balay     *idx = 0;
5119867e207SSatish Balay     if (*nz) {
5129867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
5139867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
5149867e207SSatish Balay         idx_i = *idx + i*bs;
5159867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
5169867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
5179867e207SSatish Balay       }
5189867e207SSatish Balay     }
5199867e207SSatish Balay   }
5209867e207SSatish Balay   return 0;
5219867e207SSatish Balay }
5229867e207SSatish Balay 
5239867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
5249867e207SSatish Balay {
5259867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
5269867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
5279867e207SSatish Balay   return 0;
5289867e207SSatish Balay }
529b6490206SBarry Smith 
530f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
531f2501298SSatish Balay {
532f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
533f2501298SSatish Balay   Mat         C;
534f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
5359df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
536f2501298SSatish Balay   Scalar      *array=a->a;
537f2501298SSatish Balay 
538f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
539f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
540f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
541f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
542a7c10996SSatish Balay 
543a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
544f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
545f2501298SSatish Balay   PetscFree(col);
546f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
547f2501298SSatish Balay   cols = rows + bs;
548f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
549f2501298SSatish Balay     cols[0] = i*bs;
550f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
551f2501298SSatish Balay     len = ai[i+1] - ai[i];
552f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
553f2501298SSatish Balay       rows[0] = (*aj++)*bs;
554f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
555f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
556f2501298SSatish Balay       array += bs2;
557f2501298SSatish Balay     }
558f2501298SSatish Balay   }
5591073c447SSatish Balay   PetscFree(rows);
560f2501298SSatish Balay 
5616d4a8577SBarry Smith   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5626d4a8577SBarry Smith   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
563f2501298SSatish Balay 
564f2501298SSatish Balay   if (B != PETSC_NULL) {
565f2501298SSatish Balay     *B = C;
566f2501298SSatish Balay   } else {
567f2501298SSatish Balay     /* This isn't really an in-place transpose */
568f2501298SSatish Balay     PetscFree(a->a);
569f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
570f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
571f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
572f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
573f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
574f2501298SSatish Balay     PetscFree(a);
575f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
576f2501298SSatish Balay     PetscHeaderDestroy(C);
577f2501298SSatish Balay   }
578f2501298SSatish Balay   return 0;
579f2501298SSatish Balay }
580f2501298SSatish Balay 
581f2501298SSatish Balay 
582584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
583584200bdSSatish Balay {
584584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
585584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
586a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
5879df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
588584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
589584200bdSSatish Balay 
5906d4a8577SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
591584200bdSSatish Balay 
592584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
593584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
594584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
595584200bdSSatish Balay     if (fshift) {
596a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
597584200bdSSatish Balay       N = ailen[i];
598584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
599584200bdSSatish Balay         ip[j-fshift] = ip[j];
6007e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
601584200bdSSatish Balay       }
602584200bdSSatish Balay     }
603584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
604584200bdSSatish Balay   }
605584200bdSSatish Balay   if (mbs) {
606584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
607584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
608584200bdSSatish Balay   }
609584200bdSSatish Balay   /* reset ilen and imax for each row */
610584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
611584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
612584200bdSSatish Balay   }
613a7c10996SSatish Balay   a->nz = ai[mbs];
614584200bdSSatish Balay 
615584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
616584200bdSSatish Balay   if (fshift && a->diag) {
617584200bdSSatish Balay     PetscFree(a->diag);
618584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
619584200bdSSatish Balay     a->diag = 0;
620584200bdSSatish Balay   }
62167790700SSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
622584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
623584200bdSSatish Balay            a->reallocs);
624584200bdSSatish Balay   return 0;
625584200bdSSatish Balay }
626584200bdSSatish Balay 
627b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
6282593348eSBarry Smith {
629b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6309df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
6312593348eSBarry Smith   return 0;
6322593348eSBarry Smith }
6332593348eSBarry Smith 
634b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
6352593348eSBarry Smith {
6362593348eSBarry Smith   Mat         A  = (Mat) obj;
637b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6382593348eSBarry Smith 
6392593348eSBarry Smith #if defined(PETSC_LOG)
6402593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
6412593348eSBarry Smith #endif
6422593348eSBarry Smith   PetscFree(a->a);
6432593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
6442593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
6452593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
6462593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
6472593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
648de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
6492593348eSBarry Smith   PetscFree(a);
650f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
651f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
6522593348eSBarry Smith   return 0;
6532593348eSBarry Smith }
6542593348eSBarry Smith 
655b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
6562593348eSBarry Smith {
657b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
6586d4a8577SBarry Smith   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
6596d4a8577SBarry Smith   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
6606d4a8577SBarry Smith   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
6616d4a8577SBarry Smith   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
6626d4a8577SBarry Smith   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
6636d4a8577SBarry Smith   else if (op == MAT_ROWS_SORTED ||
6646d4a8577SBarry Smith            op == MAT_SYMMETRIC ||
6656d4a8577SBarry Smith            op == MAT_STRUCTURALLY_SYMMETRIC ||
6666d4a8577SBarry Smith            op == MAT_YES_NEW_DIAGONALS)
66794a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
6686d4a8577SBarry Smith   else if (op == MAT_NO_NEW_DIAGONALS)
6696d4a8577SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
6702593348eSBarry Smith   else
671b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
6722593348eSBarry Smith   return 0;
6732593348eSBarry Smith }
6742593348eSBarry Smith 
6752593348eSBarry Smith 
6762593348eSBarry Smith /* -------------------------------------------------------*/
6772593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
6782593348eSBarry Smith /* -------------------------------------------------------*/
679b6490206SBarry Smith #include "pinclude/plapack.h"
680b6490206SBarry Smith 
68139b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
6822593348eSBarry Smith {
683b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
6841a6a6d98SBarry Smith   Scalar          *xg,*zg;
68539b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
68639b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
6872593348eSBarry Smith 
688bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690b6490206SBarry Smith 
691119a934aSSatish Balay   idx   = a->j;
692119a934aSSatish Balay   v     = a->a;
693119a934aSSatish Balay   ii    = a->i;
694119a934aSSatish Balay 
695119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
696119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
697119a934aSSatish Balay     sum  = 0.0;
698119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
6991a6a6d98SBarry Smith     z[i] = sum;
700119a934aSSatish Balay   }
70139b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
70239b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
70339b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
70439b95ed1SBarry Smith   return 0;
70539b95ed1SBarry Smith }
70639b95ed1SBarry Smith 
70739b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
70839b95ed1SBarry Smith {
70939b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
71039b95ed1SBarry Smith   Scalar          *xg,*zg;
71139b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
71239b95ed1SBarry Smith   register Scalar x1,x2;
71339b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
71439b95ed1SBarry Smith   int             j,n,ierr;
71539b95ed1SBarry Smith 
71639b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
71739b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
71839b95ed1SBarry Smith 
71939b95ed1SBarry Smith   idx   = a->j;
72039b95ed1SBarry Smith   v     = a->a;
72139b95ed1SBarry Smith   ii    = a->i;
72239b95ed1SBarry Smith 
723119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
724119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
725119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
726119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
727119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
728119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
729119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
730119a934aSSatish Balay       v += 4;
731119a934aSSatish Balay     }
7321a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
733119a934aSSatish Balay     z += 2;
734119a934aSSatish Balay   }
73539b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
73639b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
73739b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
73839b95ed1SBarry Smith   return 0;
73939b95ed1SBarry Smith }
74039b95ed1SBarry Smith 
74139b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
74239b95ed1SBarry Smith {
74339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
74439b95ed1SBarry Smith   Scalar          *xg,*zg;
74539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
74639b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
74739b95ed1SBarry Smith 
74839b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
74939b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
75039b95ed1SBarry Smith 
75139b95ed1SBarry Smith   idx   = a->j;
75239b95ed1SBarry Smith   v     = a->a;
75339b95ed1SBarry Smith   ii    = a->i;
75439b95ed1SBarry Smith 
755119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
756119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
757119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
758119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
759119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
760119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
761119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
762119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
763119a934aSSatish Balay       v += 9;
764119a934aSSatish Balay     }
7651a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
766119a934aSSatish Balay     z += 3;
767119a934aSSatish Balay   }
76839b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
76939b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
77039b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
77139b95ed1SBarry Smith   return 0;
77239b95ed1SBarry Smith }
77339b95ed1SBarry Smith 
77439b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
77539b95ed1SBarry Smith {
77639b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
77739b95ed1SBarry Smith   Scalar          *xg,*zg;
77839b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
77939b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
78039b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
78139b95ed1SBarry Smith   int             j,n,ierr;
78239b95ed1SBarry Smith 
78339b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
78439b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
78539b95ed1SBarry Smith 
78639b95ed1SBarry Smith   idx   = a->j;
78739b95ed1SBarry Smith   v     = a->a;
78839b95ed1SBarry Smith   ii    = a->i;
78939b95ed1SBarry Smith 
790119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
791119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
792119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
793119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
794119a934aSSatish Balay       xb = x + 4*(*idx++);
795119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
796119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
797119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
798119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
799119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
800119a934aSSatish Balay       v += 16;
801119a934aSSatish Balay     }
8021a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
803119a934aSSatish Balay     z += 4;
804119a934aSSatish Balay   }
80539b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
80639b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
80739b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
80839b95ed1SBarry Smith   return 0;
80939b95ed1SBarry Smith }
81039b95ed1SBarry Smith 
81139b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
81239b95ed1SBarry Smith {
81339b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
81439b95ed1SBarry Smith   Scalar          *xg,*zg;
81539b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
81639b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
81739b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
81839b95ed1SBarry Smith 
81939b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
82039b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
82139b95ed1SBarry Smith 
82239b95ed1SBarry Smith   idx   = a->j;
82339b95ed1SBarry Smith   v     = a->a;
82439b95ed1SBarry Smith   ii    = a->i;
82539b95ed1SBarry Smith 
826119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
827119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
828119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
829119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
830119a934aSSatish Balay       xb = x + 5*(*idx++);
831119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
832119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
833119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
834119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
835119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
836119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
837119a934aSSatish Balay       v += 25;
838119a934aSSatish Balay     }
8391a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
840119a934aSSatish Balay     z += 5;
841119a934aSSatish Balay   }
84239b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
84339b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
84439b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
84539b95ed1SBarry Smith   return 0;
84639b95ed1SBarry Smith }
84739b95ed1SBarry Smith 
84839b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
84939b95ed1SBarry Smith {
85039b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
85139b95ed1SBarry Smith   Scalar          *xg,*zg;
85239b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
85339b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
8541a6a6d98SBarry Smith   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
85539b95ed1SBarry Smith 
85639b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
85739b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
85839b95ed1SBarry Smith 
85939b95ed1SBarry Smith   idx   = a->j;
86039b95ed1SBarry Smith   v     = a->a;
86139b95ed1SBarry Smith   ii    = a->i;
86239b95ed1SBarry Smith 
86339b95ed1SBarry Smith 
864119a934aSSatish Balay   if (!a->mult_work) {
8653b547af2SSatish Balay     k = PetscMax(a->m,a->n);
8662b3076dcSSatish Balay     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
867119a934aSSatish Balay   }
868119a934aSSatish Balay   work = a->mult_work;
869119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
870119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
871119a934aSSatish Balay     ncols = n*bs;
872119a934aSSatish Balay     workt = work;
873119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
874119a934aSSatish Balay       xb = x + bs*(*idx++);
875119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
876119a934aSSatish Balay       workt += bs;
877119a934aSSatish Balay     }
8781a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
879119a934aSSatish Balay     v += n*bs2;
880119a934aSSatish Balay     z += bs;
881119a934aSSatish Balay   }
8820419e6cdSSatish Balay    ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
8830419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
8841a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
885bb42667fSSatish Balay   return 0;
886bb42667fSSatish Balay }
887bb42667fSSatish Balay 
888f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
889f44d3a6dSSatish Balay {
890f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
891f44d3a6dSSatish Balay   Scalar          *xg,*yg,*zg;
892f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,sum;
893f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
894f44d3a6dSSatish Balay 
895f44d3a6dSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
896f44d3a6dSSatish Balay   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
897f44d3a6dSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
898f44d3a6dSSatish Balay 
899f44d3a6dSSatish Balay   idx   = a->j;
900f44d3a6dSSatish Balay   v     = a->a;
901f44d3a6dSSatish Balay   ii    = a->i;
902f44d3a6dSSatish Balay 
903f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
904f44d3a6dSSatish Balay     n    = ii[1] - ii[0]; ii++;
905f44d3a6dSSatish Balay     sum  = y[i];
906f44d3a6dSSatish Balay     while (n--) sum += *v++ * x[*idx++];
907f44d3a6dSSatish Balay     z[i] = sum;
908f44d3a6dSSatish Balay   }
909f44d3a6dSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
910f44d3a6dSSatish Balay   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
911f44d3a6dSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
912f44d3a6dSSatish Balay   PLogFlops(2*a->nz);
913f44d3a6dSSatish Balay   return 0;
914f44d3a6dSSatish Balay }
915f44d3a6dSSatish Balay 
916f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
917f44d3a6dSSatish Balay {
918f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
919f44d3a6dSSatish Balay   Scalar          *xg,*yg,*zg;
920f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
921f44d3a6dSSatish Balay   register Scalar x1,x2;
922f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
923f44d3a6dSSatish Balay   int             j,n,ierr;
924f44d3a6dSSatish Balay 
925f44d3a6dSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
926f44d3a6dSSatish Balay   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
927f44d3a6dSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
928f44d3a6dSSatish Balay 
929f44d3a6dSSatish Balay   idx   = a->j;
930f44d3a6dSSatish Balay   v     = a->a;
931f44d3a6dSSatish Balay   ii    = a->i;
932f44d3a6dSSatish Balay 
933f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
934f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
935f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1];
936f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
937f44d3a6dSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
938f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
939f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
940f44d3a6dSSatish Balay       v += 4;
941f44d3a6dSSatish Balay     }
942f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2;
943f44d3a6dSSatish Balay     z += 2; y += 2;
944f44d3a6dSSatish Balay   }
945f44d3a6dSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
946f44d3a6dSSatish Balay   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
947f44d3a6dSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
948f44d3a6dSSatish Balay   PLogFlops(4*a->nz);
949f44d3a6dSSatish Balay   return 0;
950f44d3a6dSSatish Balay }
951f44d3a6dSSatish Balay 
952f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
953f44d3a6dSSatish Balay {
954f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
955f44d3a6dSSatish Balay   Scalar          *xg,*yg,*zg;
956f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
957f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
958f44d3a6dSSatish Balay 
959f44d3a6dSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
960f44d3a6dSSatish Balay   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
961f44d3a6dSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
962f44d3a6dSSatish Balay 
963f44d3a6dSSatish Balay   idx   = a->j;
964f44d3a6dSSatish Balay   v     = a->a;
965f44d3a6dSSatish Balay   ii    = a->i;
966f44d3a6dSSatish Balay 
967f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
968f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
969f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
970f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
971f44d3a6dSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
972f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
973f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
974f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
975f44d3a6dSSatish Balay       v += 9;
976f44d3a6dSSatish Balay     }
977f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3;
978f44d3a6dSSatish Balay     z += 3; y += 3;
979f44d3a6dSSatish Balay   }
980f44d3a6dSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
981f44d3a6dSSatish Balay   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
982f44d3a6dSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
983f44d3a6dSSatish Balay   PLogFlops(18*a->nz);
984f44d3a6dSSatish Balay   return 0;
985f44d3a6dSSatish Balay }
986f44d3a6dSSatish Balay 
987f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
988f44d3a6dSSatish Balay {
989f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
990f44d3a6dSSatish Balay   Scalar          *xg,*yg,*zg;
991f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
992f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4;
993f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii;
994f44d3a6dSSatish Balay   int             j,n,ierr;
995f44d3a6dSSatish Balay 
996f44d3a6dSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
997f44d3a6dSSatish Balay   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
998f44d3a6dSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
999f44d3a6dSSatish Balay 
1000f44d3a6dSSatish Balay   idx   = a->j;
1001f44d3a6dSSatish Balay   v     = a->a;
1002f44d3a6dSSatish Balay   ii    = a->i;
1003f44d3a6dSSatish Balay 
1004f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1005f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1006f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1007f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1008f44d3a6dSSatish Balay       xb = x + 4*(*idx++);
1009f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1010f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1011f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1012f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1013f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1014f44d3a6dSSatish Balay       v += 16;
1015f44d3a6dSSatish Balay     }
1016f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1017f44d3a6dSSatish Balay     z += 4; y += 4;
1018f44d3a6dSSatish Balay   }
1019f44d3a6dSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1020f44d3a6dSSatish Balay   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
1021f44d3a6dSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1022f44d3a6dSSatish Balay   PLogFlops(32*a->nz);
1023f44d3a6dSSatish Balay   return 0;
1024f44d3a6dSSatish Balay }
1025f44d3a6dSSatish Balay 
1026f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1027f44d3a6dSSatish Balay {
1028f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1029f44d3a6dSSatish Balay   Scalar          *xg,*yg,*zg;
1030f44d3a6dSSatish Balay   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1031f44d3a6dSSatish Balay   register Scalar x1,x2,x3,x4,x5;
1032f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
1033f44d3a6dSSatish Balay 
1034f44d3a6dSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1035f44d3a6dSSatish Balay   ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg;
1036f44d3a6dSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1037f44d3a6dSSatish Balay 
1038f44d3a6dSSatish Balay   idx   = a->j;
1039f44d3a6dSSatish Balay   v     = a->a;
1040f44d3a6dSSatish Balay   ii    = a->i;
1041f44d3a6dSSatish Balay 
1042f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1043f44d3a6dSSatish Balay     n  = ii[1] - ii[0]; ii++;
1044f44d3a6dSSatish Balay     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1045f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1046f44d3a6dSSatish Balay       xb = x + 5*(*idx++);
1047f44d3a6dSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1048f44d3a6dSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1049f44d3a6dSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1050f44d3a6dSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1051f44d3a6dSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1052f44d3a6dSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1053f44d3a6dSSatish Balay       v += 25;
1054f44d3a6dSSatish Balay     }
1055f44d3a6dSSatish Balay     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1056f44d3a6dSSatish Balay     z += 5; y += 5;
1057f44d3a6dSSatish Balay   }
1058f44d3a6dSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1059f44d3a6dSSatish Balay   ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr);
1060f44d3a6dSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1061f44d3a6dSSatish Balay   PLogFlops(50*a->nz);
1062f44d3a6dSSatish Balay   return 0;
1063f44d3a6dSSatish Balay }
1064f44d3a6dSSatish Balay 
1065f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1066f44d3a6dSSatish Balay {
1067f44d3a6dSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1068f44d3a6dSSatish Balay   Scalar          *xg,*zg;
1069f44d3a6dSSatish Balay   register Scalar *x,*z,*v,*xb;
1070f44d3a6dSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1071f44d3a6dSSatish Balay   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1072f44d3a6dSSatish Balay 
1073f44d3a6dSSatish Balay   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1074f44d3a6dSSatish Balay 
1075f44d3a6dSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1076f44d3a6dSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1077f44d3a6dSSatish Balay 
1078f44d3a6dSSatish Balay   idx   = a->j;
1079f44d3a6dSSatish Balay   v     = a->a;
1080f44d3a6dSSatish Balay   ii    = a->i;
1081f44d3a6dSSatish Balay 
1082f44d3a6dSSatish Balay 
1083f44d3a6dSSatish Balay   if (!a->mult_work) {
1084f44d3a6dSSatish Balay     k = PetscMax(a->m,a->n);
1085f44d3a6dSSatish Balay     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1086f44d3a6dSSatish Balay   }
1087f44d3a6dSSatish Balay   work = a->mult_work;
1088f44d3a6dSSatish Balay   for ( i=0; i<mbs; i++ ) {
1089f44d3a6dSSatish Balay     n     = ii[1] - ii[0]; ii++;
1090f44d3a6dSSatish Balay     ncols = n*bs;
1091f44d3a6dSSatish Balay     workt = work;
1092f44d3a6dSSatish Balay     for ( j=0; j<n; j++ ) {
1093f44d3a6dSSatish Balay       xb = x + bs*(*idx++);
1094f44d3a6dSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1095f44d3a6dSSatish Balay       workt += bs;
1096f44d3a6dSSatish Balay     }
1097f44d3a6dSSatish Balay     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1098f44d3a6dSSatish Balay     v += n*bs2;
1099f44d3a6dSSatish Balay     z += bs;
1100f44d3a6dSSatish Balay   }
1101f44d3a6dSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1102f44d3a6dSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1103f44d3a6dSSatish Balay   PLogFlops(2*a->nz*bs2 );
1104f44d3a6dSSatish Balay   return 0;
1105f44d3a6dSSatish Balay }
1106f44d3a6dSSatish Balay 
11071a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1108bb42667fSSatish Balay {
1109bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
11101a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
1111bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1112bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
11139df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1114119a934aSSatish Balay 
1115119a934aSSatish Balay 
1116bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1117bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1118bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
1119bb42667fSSatish Balay 
1120119a934aSSatish Balay   idx   = a->j;
1121119a934aSSatish Balay   v     = a->a;
1122119a934aSSatish Balay   ii    = a->i;
1123119a934aSSatish Balay 
1124119a934aSSatish Balay   switch (bs) {
1125119a934aSSatish Balay   case 1:
1126119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1127119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1128119a934aSSatish Balay       xb = x + i; x1 = xb[0];
1129119a934aSSatish Balay       ib = idx + ai[i];
1130119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1131bb1453f0SSatish Balay         rval    = ib[j];
1132bb1453f0SSatish Balay         z[rval] += *v++ * x1;
1133119a934aSSatish Balay       }
1134119a934aSSatish Balay     }
1135119a934aSSatish Balay     break;
1136119a934aSSatish Balay   case 2:
1137119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1138119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1139119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1140119a934aSSatish Balay       ib = idx + ai[i];
1141119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1142119a934aSSatish Balay         rval      = ib[j]*2;
1143bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1144bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1145119a934aSSatish Balay         v += 4;
1146119a934aSSatish Balay       }
1147119a934aSSatish Balay     }
1148119a934aSSatish Balay     break;
1149119a934aSSatish Balay   case 3:
1150119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1151119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1152119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1153119a934aSSatish Balay       ib = idx + ai[i];
1154119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1155119a934aSSatish Balay         rval      = ib[j]*3;
1156bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1157bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1158bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1159119a934aSSatish Balay         v += 9;
1160119a934aSSatish Balay       }
1161119a934aSSatish Balay     }
1162119a934aSSatish Balay     break;
1163119a934aSSatish Balay   case 4:
1164119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1165119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1166119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1167119a934aSSatish Balay       ib = idx + ai[i];
1168119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1169119a934aSSatish Balay         rval      = ib[j]*4;
1170bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1171bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1172bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1173bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1174119a934aSSatish Balay         v += 16;
1175119a934aSSatish Balay       }
1176119a934aSSatish Balay     }
1177119a934aSSatish Balay     break;
1178119a934aSSatish Balay   case 5:
1179119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
1180119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
1181119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1182119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
1183119a934aSSatish Balay       ib = idx + ai[i];
1184119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
1185119a934aSSatish Balay         rval      = ib[j]*5;
1186bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1187bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1188bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1189bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1190bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1191119a934aSSatish Balay         v += 25;
1192119a934aSSatish Balay       }
1193119a934aSSatish Balay     }
1194119a934aSSatish Balay     break;
1195119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1196119a934aSSatish Balay     default: {
1197119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1198119a934aSSatish Balay       if (!a->mult_work) {
11993b547af2SSatish Balay         k = PetscMax(a->m,a->n);
1200bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1201119a934aSSatish Balay         CHKPTRQ(a->mult_work);
1202119a934aSSatish Balay       }
1203119a934aSSatish Balay       work = a->mult_work;
1204119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
1205119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
1206119a934aSSatish Balay         ncols = n*bs;
1207119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1208119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1209119a934aSSatish Balay         v += n*bs2;
1210119a934aSSatish Balay         x += bs;
1211119a934aSSatish Balay         workt = work;
1212119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
1213119a934aSSatish Balay           zb = z + bs*(*idx++);
1214bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1215119a934aSSatish Balay           workt += bs;
1216119a934aSSatish Balay         }
1217119a934aSSatish Balay       }
1218119a934aSSatish Balay     }
1219119a934aSSatish Balay   }
12200419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
12210419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1222faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2 - a->n);
1223faf6580fSSatish Balay   return 0;
1224faf6580fSSatish Balay }
1225faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1226faf6580fSSatish Balay {
1227faf6580fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1228faf6580fSSatish Balay   Scalar          *xg,*zg,*zb;
1229faf6580fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1230faf6580fSSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1231faf6580fSSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1232faf6580fSSatish Balay 
1233faf6580fSSatish Balay 
1234faf6580fSSatish Balay 
1235faf6580fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1236faf6580fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1237faf6580fSSatish Balay 
1238648c1d95SSatish Balay   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1239648c1d95SSatish Balay   else PetscMemzero(z,N*sizeof(Scalar));
1240648c1d95SSatish Balay 
1241faf6580fSSatish Balay 
1242faf6580fSSatish Balay   idx   = a->j;
1243faf6580fSSatish Balay   v     = a->a;
1244faf6580fSSatish Balay   ii    = a->i;
1245faf6580fSSatish Balay 
1246faf6580fSSatish Balay   switch (bs) {
1247faf6580fSSatish Balay   case 1:
1248faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1249faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1250faf6580fSSatish Balay       xb = x + i; x1 = xb[0];
1251faf6580fSSatish Balay       ib = idx + ai[i];
1252faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1253faf6580fSSatish Balay         rval    = ib[j];
1254faf6580fSSatish Balay         z[rval] += *v++ * x1;
1255faf6580fSSatish Balay       }
1256faf6580fSSatish Balay     }
1257faf6580fSSatish Balay     break;
1258faf6580fSSatish Balay   case 2:
1259faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1260faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1261faf6580fSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1262faf6580fSSatish Balay       ib = idx + ai[i];
1263faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1264faf6580fSSatish Balay         rval      = ib[j]*2;
1265faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
1266faf6580fSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
1267faf6580fSSatish Balay         v += 4;
1268faf6580fSSatish Balay       }
1269faf6580fSSatish Balay     }
1270faf6580fSSatish Balay     break;
1271faf6580fSSatish Balay   case 3:
1272faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1273faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1274faf6580fSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1275faf6580fSSatish Balay       ib = idx + ai[i];
1276faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1277faf6580fSSatish Balay         rval      = ib[j]*3;
1278faf6580fSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1279faf6580fSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1280faf6580fSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1281faf6580fSSatish Balay         v += 9;
1282faf6580fSSatish Balay       }
1283faf6580fSSatish Balay     }
1284faf6580fSSatish Balay     break;
1285faf6580fSSatish Balay   case 4:
1286faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1287faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1288faf6580fSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1289faf6580fSSatish Balay       ib = idx + ai[i];
1290faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1291faf6580fSSatish Balay         rval      = ib[j]*4;
1292faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1293faf6580fSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1294faf6580fSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1295faf6580fSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1296faf6580fSSatish Balay         v += 16;
1297faf6580fSSatish Balay       }
1298faf6580fSSatish Balay     }
1299faf6580fSSatish Balay     break;
1300faf6580fSSatish Balay   case 5:
1301faf6580fSSatish Balay     for ( i=0; i<mbs; i++ ) {
1302faf6580fSSatish Balay       n  = ii[1] - ii[0]; ii++;
1303faf6580fSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1304faf6580fSSatish Balay       x4 = xb[3];   x5 = xb[4];
1305faf6580fSSatish Balay       ib = idx + ai[i];
1306faf6580fSSatish Balay       for ( j=0; j<n; j++ ) {
1307faf6580fSSatish Balay         rval      = ib[j]*5;
1308faf6580fSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1309faf6580fSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1310faf6580fSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1311faf6580fSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1312faf6580fSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1313faf6580fSSatish Balay         v += 25;
1314faf6580fSSatish Balay       }
1315faf6580fSSatish Balay     }
1316faf6580fSSatish Balay     break;
1317faf6580fSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
1318faf6580fSSatish Balay     default: {
1319faf6580fSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1320faf6580fSSatish Balay       if (!a->mult_work) {
1321faf6580fSSatish Balay         k = PetscMax(a->m,a->n);
1322faf6580fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1323faf6580fSSatish Balay         CHKPTRQ(a->mult_work);
1324faf6580fSSatish Balay       }
1325faf6580fSSatish Balay       work = a->mult_work;
1326faf6580fSSatish Balay       for ( i=0; i<mbs; i++ ) {
1327faf6580fSSatish Balay         n     = ii[1] - ii[0]; ii++;
1328faf6580fSSatish Balay         ncols = n*bs;
1329faf6580fSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
1330faf6580fSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1331faf6580fSSatish Balay         v += n*bs2;
1332faf6580fSSatish Balay         x += bs;
1333faf6580fSSatish Balay         workt = work;
1334faf6580fSSatish Balay         for ( j=0; j<n; j++ ) {
1335faf6580fSSatish Balay           zb = z + bs*(*idx++);
1336faf6580fSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1337faf6580fSSatish Balay           workt += bs;
1338faf6580fSSatish Balay         }
1339faf6580fSSatish Balay       }
1340faf6580fSSatish Balay     }
1341faf6580fSSatish Balay   }
1342faf6580fSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1343faf6580fSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1344faf6580fSSatish Balay   PLogFlops(2*a->nz*a->bs2);
13452593348eSBarry Smith   return 0;
13462593348eSBarry Smith }
13472593348eSBarry Smith 
1348de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
13492593348eSBarry Smith {
1350b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13519df24120SSatish Balay   if (nz)  *nz  = a->bs2*a->nz;
1352bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
1353bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
13542593348eSBarry Smith   return 0;
13552593348eSBarry Smith }
13562593348eSBarry Smith 
135791d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
135891d316f6SSatish Balay {
135991d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
136091d316f6SSatish Balay 
136191d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
136291d316f6SSatish Balay 
136391d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
136491d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1365a7c10996SSatish Balay       (a->nz != b->nz)) {
136691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
136791d316f6SSatish Balay   }
136891d316f6SSatish Balay 
136991d316f6SSatish Balay   /* if the a->i are the same */
137091d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
137191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137291d316f6SSatish Balay   }
137391d316f6SSatish Balay 
137491d316f6SSatish Balay   /* if a->j are the same */
137591d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
137691d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
137791d316f6SSatish Balay   }
137891d316f6SSatish Balay 
137991d316f6SSatish Balay   /* if a->a are the same */
138091d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
138191d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
138291d316f6SSatish Balay   }
138391d316f6SSatish Balay   *flg = PETSC_TRUE;
138491d316f6SSatish Balay   return 0;
138591d316f6SSatish Balay 
138691d316f6SSatish Balay }
138791d316f6SSatish Balay 
138891d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
138991d316f6SSatish Balay {
139091d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
13917e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
139217e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
139317e48fc4SSatish Balay 
139417e48fc4SSatish Balay   bs   = a->bs;
139517e48fc4SSatish Balay   aa   = a->a;
139617e48fc4SSatish Balay   ai   = a->i;
139717e48fc4SSatish Balay   aj   = a->j;
139817e48fc4SSatish Balay   ambs = a->mbs;
13999df24120SSatish Balay   bs2  = a->bs2;
140091d316f6SSatish Balay 
140191d316f6SSatish Balay   VecSet(&zero,v);
140291d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
14039867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
140417e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
140517e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
140617e48fc4SSatish Balay       if (aj[j] == i) {
140717e48fc4SSatish Balay         row  = i*bs;
14087e67e3f9SSatish Balay         aa_j = aa+j*bs2;
14097e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
141091d316f6SSatish Balay         break;
141191d316f6SSatish Balay       }
141291d316f6SSatish Balay     }
141391d316f6SSatish Balay   }
141491d316f6SSatish Balay   return 0;
141591d316f6SSatish Balay }
141691d316f6SSatish Balay 
14179867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
14189867e207SSatish Balay {
14199867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14209867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
14217e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
14229867e207SSatish Balay 
14239867e207SSatish Balay   ai  = a->i;
14249867e207SSatish Balay   aj  = a->j;
14259867e207SSatish Balay   aa  = a->a;
14269867e207SSatish Balay   m   = a->m;
14279867e207SSatish Balay   n   = a->n;
14289867e207SSatish Balay   bs  = a->bs;
14299867e207SSatish Balay   mbs = a->mbs;
14309df24120SSatish Balay   bs2 = a->bs2;
14319867e207SSatish Balay   if (ll) {
14329867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
14339867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
14349867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14359867e207SSatish Balay       M  = ai[i+1] - ai[i];
14369867e207SSatish Balay       li = l + i*bs;
14377e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14389867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14397e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
14409867e207SSatish Balay           (*v++) *= li[k%bs];
14419867e207SSatish Balay         }
14429867e207SSatish Balay       }
14439867e207SSatish Balay     }
14449867e207SSatish Balay   }
14459867e207SSatish Balay 
14469867e207SSatish Balay   if (rr) {
14479867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
14489867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
14499867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
14509867e207SSatish Balay       M  = ai[i+1] - ai[i];
14517e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
14529867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
14539867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
14549867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
14559867e207SSatish Balay           x = ri[k];
14569867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
14579867e207SSatish Balay         }
14589867e207SSatish Balay       }
14599867e207SSatish Balay     }
14609867e207SSatish Balay   }
14619867e207SSatish Balay   return 0;
14629867e207SSatish Balay }
14639867e207SSatish Balay 
14649867e207SSatish Balay 
1465b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1466b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
14672a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1468736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1469736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
14701a6a6d98SBarry Smith 
14711a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
14721a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
14731a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
14741a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
14751a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
14761a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
14771a6a6d98SBarry Smith 
14787fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
14797fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
14807fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
14817fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
14827fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
14837fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
14842593348eSBarry Smith 
1485b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
14862593348eSBarry Smith {
1487b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
14882593348eSBarry Smith   Scalar      *v = a->a;
14892593348eSBarry Smith   double      sum = 0.0;
14909df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
14912593348eSBarry Smith 
14922593348eSBarry Smith   if (type == NORM_FROBENIUS) {
14930419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
14942593348eSBarry Smith #if defined(PETSC_COMPLEX)
14952593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
14962593348eSBarry Smith #else
14972593348eSBarry Smith       sum += (*v)*(*v); v++;
14982593348eSBarry Smith #endif
14992593348eSBarry Smith     }
15002593348eSBarry Smith     *norm = sqrt(sum);
15012593348eSBarry Smith   }
15022593348eSBarry Smith   else {
1503b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
15042593348eSBarry Smith   }
15052593348eSBarry Smith   return 0;
15062593348eSBarry Smith }
15072593348eSBarry Smith 
15082593348eSBarry Smith /*
15092593348eSBarry Smith      note: This can only work for identity for row and col. It would
15102593348eSBarry Smith    be good to check this and otherwise generate an error.
15112593348eSBarry Smith */
1512b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
15132593348eSBarry Smith {
1514b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15152593348eSBarry Smith   Mat         outA;
1516de6a44a3SBarry Smith   int         ierr;
15172593348eSBarry Smith 
1518b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
15192593348eSBarry Smith 
15202593348eSBarry Smith   outA          = inA;
15212593348eSBarry Smith   inA->factor   = FACTOR_LU;
15222593348eSBarry Smith   a->row        = row;
15232593348eSBarry Smith   a->col        = col;
15242593348eSBarry Smith 
1525de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
15262593348eSBarry Smith 
15272593348eSBarry Smith   if (!a->diag) {
1528b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
15292593348eSBarry Smith   }
15307fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
15312593348eSBarry Smith   return 0;
15322593348eSBarry Smith }
15332593348eSBarry Smith 
1534b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
15352593348eSBarry Smith {
1536b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
15379df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1538b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1539b6490206SBarry Smith   PLogFlops(totalnz);
15402593348eSBarry Smith   return 0;
15412593348eSBarry Smith }
15422593348eSBarry Smith 
15437e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
15447e67e3f9SSatish Balay {
15457e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
15467e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1547a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
15489df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
15497e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
15507e67e3f9SSatish Balay 
15517e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
15527e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
15537e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
15547e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1555a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
15567e67e3f9SSatish Balay     nrow = ailen[brow];
15577e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
15587e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
15597e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1560a7c10996SSatish Balay       col  = in[l] ;
15617e67e3f9SSatish Balay       bcol = col/bs;
15627e67e3f9SSatish Balay       cidx = col%bs;
15637e67e3f9SSatish Balay       ridx = row%bs;
15647e67e3f9SSatish Balay       high = nrow;
15657e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
15667e67e3f9SSatish Balay       while (high-low > 5) {
15677e67e3f9SSatish Balay         t = (low+high)/2;
15687e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
15697e67e3f9SSatish Balay         else             low  = t;
15707e67e3f9SSatish Balay       }
15717e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
15727e67e3f9SSatish Balay         if (rp[i] > bcol) break;
15737e67e3f9SSatish Balay         if (rp[i] == bcol) {
15747e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
15757e67e3f9SSatish Balay           goto finished;
15767e67e3f9SSatish Balay         }
15777e67e3f9SSatish Balay       }
15787e67e3f9SSatish Balay       *v++ = zero;
15797e67e3f9SSatish Balay       finished:;
15807e67e3f9SSatish Balay     }
15817e67e3f9SSatish Balay   }
15827e67e3f9SSatish Balay   return 0;
15837e67e3f9SSatish Balay }
15847e67e3f9SSatish Balay 
15855a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
15865a838052SSatish Balay {
15875a838052SSatish Balay   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
15885a838052SSatish Balay   *bs = baij->bs;
15895a838052SSatish Balay   return 0;
15905a838052SSatish Balay }
15915a838052SSatish Balay 
1592d9b7c43dSSatish Balay /* idx should be of length atlease bs */
1593d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1594d9b7c43dSSatish Balay {
1595d9b7c43dSSatish Balay   int i,row;
1596d9b7c43dSSatish Balay   row = idx[0];
1597d9b7c43dSSatish Balay   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1598d9b7c43dSSatish Balay 
1599d9b7c43dSSatish Balay   for ( i=1; i<bs; i++ ) {
1600d9b7c43dSSatish Balay     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1601d9b7c43dSSatish Balay   }
1602d9b7c43dSSatish Balay   *flg = PETSC_TRUE;
1603d9b7c43dSSatish Balay   return 0;
1604d9b7c43dSSatish Balay }
1605d9b7c43dSSatish Balay 
1606d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1607d9b7c43dSSatish Balay {
1608d9b7c43dSSatish Balay   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1609d9b7c43dSSatish Balay   IS          is_local;
1610d9b7c43dSSatish Balay   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1611d9b7c43dSSatish Balay   PetscTruth  flg;
1612d9b7c43dSSatish Balay   Scalar      zero = 0.0,*aa;
1613d9b7c43dSSatish Balay 
1614d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
1615d9b7c43dSSatish Balay   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1616d9b7c43dSSatish Balay   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1617d9b7c43dSSatish Balay   ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1618d9b7c43dSSatish Balay   ierr = ISSort(is_local); CHKERRQ(ierr);
1619d9b7c43dSSatish Balay   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1620d9b7c43dSSatish Balay 
1621d9b7c43dSSatish Balay   i = 0;
1622d9b7c43dSSatish Balay   while (i < is_n) {
1623d9b7c43dSSatish Balay     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1624d9b7c43dSSatish Balay     flg = PETSC_FALSE;
1625d9b7c43dSSatish Balay     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1626d9b7c43dSSatish Balay     if (flg) { /* There exists a block of rows to be Zerowed */
1627d9b7c43dSSatish Balay       baij->ilen[rows[i]/bs] = 0;
1628d9b7c43dSSatish Balay       i += bs;
1629d9b7c43dSSatish Balay     } else { /* Zero out only the requested row */
1630d9b7c43dSSatish Balay       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1631d9b7c43dSSatish Balay       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1632d9b7c43dSSatish Balay       for ( j=0; j<count; j++ ) {
1633d9b7c43dSSatish Balay         aa[0] = zero;
1634d9b7c43dSSatish Balay         aa+=bs;
1635d9b7c43dSSatish Balay       }
1636d9b7c43dSSatish Balay       i++;
1637d9b7c43dSSatish Balay     }
1638d9b7c43dSSatish Balay   }
1639d9b7c43dSSatish Balay   if (diag) {
1640d9b7c43dSSatish Balay     for ( j=0; j<is_n; j++ ) {
1641d9b7c43dSSatish Balay       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1642d9b7c43dSSatish Balay     }
1643d9b7c43dSSatish Balay   }
1644d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1645d9b7c43dSSatish Balay   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1646d9b7c43dSSatish Balay   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1647d9b7c43dSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1648d9b7c43dSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1649d9b7c43dSSatish Balay 
1650d9b7c43dSSatish Balay   return 0;
1651d9b7c43dSSatish Balay }
1652ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A)
1653ba4ca20aSSatish Balay {
1654ba4ca20aSSatish Balay   static int called = 0;
1655ba4ca20aSSatish Balay   MPI_Comm   comm = A->comm;
1656ba4ca20aSSatish Balay 
1657ba4ca20aSSatish Balay   if (called) return 0; else called = 1;
1658ba4ca20aSSatish Balay   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1659ba4ca20aSSatish Balay   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1660ba4ca20aSSatish Balay   return 0;
1661ba4ca20aSSatish Balay }
1662d9b7c43dSSatish Balay 
16632593348eSBarry Smith /* -------------------------------------------------------------------*/
1664cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
16659867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1666f44d3a6dSSatish Balay        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1667faf6580fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
16681a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1669b6490206SBarry Smith        0,0,
1670de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1671b6490206SBarry Smith        0,
1672f2501298SSatish Balay        MatTranspose_SeqBAIJ,
167317e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
16749867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1675584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1676b6490206SBarry Smith        0,
1677d9b7c43dSSatish Balay        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1678b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
16797fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1680b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1681de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1682b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1683736121d4SSatish Balay        MatGetSubMatrix_SeqBAIJ,0,
1684b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1685b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1686af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
16877e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1688ba4ca20aSSatish Balay        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
16895a838052SSatish Balay        0,0,0,MatGetBlockSize_SeqBAIJ};
16902593348eSBarry Smith 
16912593348eSBarry Smith /*@C
169244cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
169344cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
169444cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
16952bd5e0b2SLois Curfman McInnes    (or the array nzz).  By setting these parameters accurately, performance
16962bd5e0b2SLois Curfman McInnes    during matrix assembly can be increased by more than a factor of 50.
16972593348eSBarry Smith 
16982593348eSBarry Smith    Input Parameters:
16992593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1700b6490206SBarry Smith .  bs - size of block
17012593348eSBarry Smith .  m - number of rows
17022593348eSBarry Smith .  n - number of columns
1703b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
17042bd5e0b2SLois Curfman McInnes .  nzz - array containing the number of block nonzeros in the various block rows
17052bd5e0b2SLois Curfman McInnes          (possibly different for each block row) or PETSC_NULL
17062593348eSBarry Smith 
17072593348eSBarry Smith    Output Parameter:
17082593348eSBarry Smith .  A - the matrix
17092593348eSBarry Smith 
17102593348eSBarry Smith    Notes:
171144cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
17122593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
171344cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
17142593348eSBarry Smith 
17152593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
17162593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
17172593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
17182593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
17192593348eSBarry Smith 
172044cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
17212593348eSBarry Smith @*/
1722b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
17232593348eSBarry Smith {
17242593348eSBarry Smith   Mat         B;
1725b6490206SBarry Smith   Mat_SeqBAIJ *b;
1726f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1727b6490206SBarry Smith 
1728f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1729f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
17302593348eSBarry Smith 
17312593348eSBarry Smith   *A = 0;
1732b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
17332593348eSBarry Smith   PLogObjectCreate(B);
1734b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
173544cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
17362593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17371a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
17381a6a6d98SBarry Smith   if (!flg) {
17397fc0212eSBarry Smith     switch (bs) {
17407fc0212eSBarry Smith       case 1:
17417fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17421a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
174339b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
1744f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
17457fc0212eSBarry Smith        break;
17464eeb42bcSBarry Smith       case 2:
17474eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
17481a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
174939b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
1750f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
17516d84be18SBarry Smith         break;
1752f327dd97SBarry Smith       case 3:
1753f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
17541a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
175539b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
1756f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
17574eeb42bcSBarry Smith         break;
17586d84be18SBarry Smith       case 4:
17596d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
17601a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
176139b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
1762f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
17636d84be18SBarry Smith         break;
17646d84be18SBarry Smith       case 5:
17656d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
17661a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
176739b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
1768f44d3a6dSSatish Balay         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
17696d84be18SBarry Smith         break;
17707fc0212eSBarry Smith     }
17711a6a6d98SBarry Smith   }
1772b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1773b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
17742593348eSBarry Smith   B->factor           = 0;
17752593348eSBarry Smith   B->lupivotthreshold = 1.0;
17762593348eSBarry Smith   b->row              = 0;
17772593348eSBarry Smith   b->col              = 0;
17782593348eSBarry Smith   b->reallocs         = 0;
17792593348eSBarry Smith 
178044cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
178144cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1782b6490206SBarry Smith   b->mbs     = mbs;
1783f2501298SSatish Balay   b->nbs     = nbs;
1784b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
17852593348eSBarry Smith   if (nnz == PETSC_NULL) {
1786119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
17872593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1788b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1789b6490206SBarry Smith     nz = nz*mbs;
17902593348eSBarry Smith   }
17912593348eSBarry Smith   else {
17922593348eSBarry Smith     nz = 0;
1793b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
17942593348eSBarry Smith   }
17952593348eSBarry Smith 
17962593348eSBarry Smith   /* allocate the matrix space */
17977e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
17982593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
17997e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
18007e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
18012593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
18022593348eSBarry Smith   b->i  = b->j + nz;
18032593348eSBarry Smith   b->singlemalloc = 1;
18042593348eSBarry Smith 
1805de6a44a3SBarry Smith   b->i[0] = 0;
1806b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
18072593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
18082593348eSBarry Smith   }
18092593348eSBarry Smith 
1810b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1811b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1812b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1813b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
18142593348eSBarry Smith 
1815b6490206SBarry Smith   b->bs               = bs;
18169df24120SSatish Balay   b->bs2              = bs2;
1817b6490206SBarry Smith   b->mbs              = mbs;
18182593348eSBarry Smith   b->nz               = 0;
181918e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
18202593348eSBarry Smith   b->sorted           = 0;
18212593348eSBarry Smith   b->roworiented      = 1;
18222593348eSBarry Smith   b->nonew            = 0;
18232593348eSBarry Smith   b->diag             = 0;
18242593348eSBarry Smith   b->solve_work       = 0;
1825de6a44a3SBarry Smith   b->mult_work        = 0;
18262593348eSBarry Smith   b->spptr            = 0;
18272593348eSBarry Smith   *A = B;
18282593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
18292593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
18302593348eSBarry Smith   return 0;
18312593348eSBarry Smith }
18322593348eSBarry Smith 
1833b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
18342593348eSBarry Smith {
18352593348eSBarry Smith   Mat         C;
1836b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
18379df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1838de6a44a3SBarry Smith 
1839de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
18402593348eSBarry Smith 
18412593348eSBarry Smith   *B = 0;
1842b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
18432593348eSBarry Smith   PLogObjectCreate(C);
1844b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
18452593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1846b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1847b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
18482593348eSBarry Smith   C->factor     = A->factor;
18492593348eSBarry Smith   c->row        = 0;
18502593348eSBarry Smith   c->col        = 0;
18512593348eSBarry Smith   C->assembled  = PETSC_TRUE;
18522593348eSBarry Smith 
185344cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
185444cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
185544cd7ae7SLois Curfman McInnes   C->M          = a->m;
185644cd7ae7SLois Curfman McInnes   C->N          = a->n;
185744cd7ae7SLois Curfman McInnes 
1858b6490206SBarry Smith   c->bs         = a->bs;
18599df24120SSatish Balay   c->bs2        = a->bs2;
1860b6490206SBarry Smith   c->mbs        = a->mbs;
1861b6490206SBarry Smith   c->nbs        = a->nbs;
18622593348eSBarry Smith 
1863b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1864b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1865b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
18662593348eSBarry Smith     c->imax[i] = a->imax[i];
18672593348eSBarry Smith     c->ilen[i] = a->ilen[i];
18682593348eSBarry Smith   }
18692593348eSBarry Smith 
18702593348eSBarry Smith   /* allocate the matrix space */
18712593348eSBarry Smith   c->singlemalloc = 1;
18727e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
18732593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
18747e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1875de6a44a3SBarry Smith   c->i  = c->j + nz;
1876b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1877b6490206SBarry Smith   if (mbs > 0) {
1878de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
18792593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
18807e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
18812593348eSBarry Smith     }
18822593348eSBarry Smith   }
18832593348eSBarry Smith 
1884b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
18852593348eSBarry Smith   c->sorted      = a->sorted;
18862593348eSBarry Smith   c->roworiented = a->roworiented;
18872593348eSBarry Smith   c->nonew       = a->nonew;
18882593348eSBarry Smith 
18892593348eSBarry Smith   if (a->diag) {
1890b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1891b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1892b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
18932593348eSBarry Smith       c->diag[i] = a->diag[i];
18942593348eSBarry Smith     }
18952593348eSBarry Smith   }
18962593348eSBarry Smith   else c->diag          = 0;
18972593348eSBarry Smith   c->nz                 = a->nz;
18982593348eSBarry Smith   c->maxnz              = a->maxnz;
18992593348eSBarry Smith   c->solve_work         = 0;
19002593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
19017fc0212eSBarry Smith   c->mult_work          = 0;
19022593348eSBarry Smith   *B = C;
19032593348eSBarry Smith   return 0;
19042593348eSBarry Smith }
19052593348eSBarry Smith 
190619bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
19072593348eSBarry Smith {
1908b6490206SBarry Smith   Mat_SeqBAIJ  *a;
19092593348eSBarry Smith   Mat          B;
1910de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1911b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
191235aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1913a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1914b6490206SBarry Smith   Scalar       *aa;
191519bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
19162593348eSBarry Smith 
19171a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1918de6a44a3SBarry Smith   bs2  = bs*bs;
1919b6490206SBarry Smith 
19202593348eSBarry Smith   MPI_Comm_size(comm,&size);
1921b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
192290ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
192377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1924de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
19252593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
19262593348eSBarry Smith 
1927b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
192835aab85fSBarry Smith 
192935aab85fSBarry Smith   /*
193035aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
193135aab85fSBarry Smith     divisible by the blocksize
193235aab85fSBarry Smith   */
1933b6490206SBarry Smith   mbs        = M/bs;
193435aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
193535aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
193635aab85fSBarry Smith   else                  mbs++;
193735aab85fSBarry Smith   if (extra_rows) {
19381a6a6d98SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
193935aab85fSBarry Smith   }
1940b6490206SBarry Smith 
19412593348eSBarry Smith   /* read in row lengths */
194235aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
194377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
194435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
19452593348eSBarry Smith 
1946b6490206SBarry Smith   /* read in column indices */
194735aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
194877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
194935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1950b6490206SBarry Smith 
1951b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1952b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1953b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
195435aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
195535aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
195635aab85fSBarry Smith   masked = mask + mbs;
1957b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1958b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
195935aab85fSBarry Smith     nmask = 0;
1960b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1961b6490206SBarry Smith       kmax = rowlengths[rowcount];
1962b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
196335aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
196435aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1965b6490206SBarry Smith       }
1966b6490206SBarry Smith       rowcount++;
1967b6490206SBarry Smith     }
196835aab85fSBarry Smith     browlengths[i] += nmask;
196935aab85fSBarry Smith     /* zero out the mask elements we set */
197035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1971b6490206SBarry Smith   }
1972b6490206SBarry Smith 
19732593348eSBarry Smith   /* create our matrix */
197435aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
197535aab85fSBarry Smith          CHKERRQ(ierr);
19762593348eSBarry Smith   B = *A;
1977b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
19782593348eSBarry Smith 
19792593348eSBarry Smith   /* set matrix "i" values */
1980de6a44a3SBarry Smith   a->i[0] = 0;
1981b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1982b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1983b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
19842593348eSBarry Smith   }
1985b6490206SBarry Smith   a->nz         = 0;
1986b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
19872593348eSBarry Smith 
1988b6490206SBarry Smith   /* read in nonzero values */
198935aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
199077c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
199135aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1992b6490206SBarry Smith 
1993b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1994b6490206SBarry Smith   nzcount = 0; jcount = 0;
1995b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1996b6490206SBarry Smith     nzcountb = nzcount;
199735aab85fSBarry Smith     nmask    = 0;
1998b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1999b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2000b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
200135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
200235aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2003b6490206SBarry Smith       }
2004b6490206SBarry Smith       rowcount++;
2005b6490206SBarry Smith     }
2006de6a44a3SBarry Smith     /* sort the masked values */
200777c4ece6SBarry Smith     PetscSortInt(nmask,masked);
2008de6a44a3SBarry Smith 
2009b6490206SBarry Smith     /* set "j" values into matrix */
2010b6490206SBarry Smith     maskcount = 1;
201135aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
201235aab85fSBarry Smith       a->j[jcount++]  = masked[j];
2013de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
2014b6490206SBarry Smith     }
2015b6490206SBarry Smith     /* set "a" values into matrix */
2016de6a44a3SBarry Smith     ishift = bs2*a->i[i];
2017b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
2018b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
2019b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
2020de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
2021de6a44a3SBarry Smith         block  = mask[tmp] - 1;
2022de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
2023de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
2024b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
2025b6490206SBarry Smith       }
2026b6490206SBarry Smith     }
202735aab85fSBarry Smith     /* zero out the mask elements we set */
202835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2029b6490206SBarry Smith   }
203035aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2031b6490206SBarry Smith 
2032b6490206SBarry Smith   PetscFree(rowlengths);
2033b6490206SBarry Smith   PetscFree(browlengths);
2034b6490206SBarry Smith   PetscFree(aa);
2035b6490206SBarry Smith   PetscFree(jj);
2036b6490206SBarry Smith   PetscFree(mask);
2037b6490206SBarry Smith 
2038b6490206SBarry Smith   B->assembled = PETSC_TRUE;
2039de6a44a3SBarry Smith 
2040de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2041de6a44a3SBarry Smith   if (flg) {
204219bcc07fSBarry Smith     Viewer tviewer;
204319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
204490ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
204519bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
204619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2047de6a44a3SBarry Smith   }
2048de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2049de6a44a3SBarry Smith   if (flg) {
205019bcc07fSBarry Smith     Viewer tviewer;
205119bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
205290ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
205319bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
205419bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2055de6a44a3SBarry Smith   }
2056de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2057de6a44a3SBarry Smith   if (flg) {
205819bcc07fSBarry Smith     Viewer tviewer;
205919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
206019bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
206119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2062de6a44a3SBarry Smith   }
2063de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2064de6a44a3SBarry Smith   if (flg) {
206519bcc07fSBarry Smith     Viewer tviewer;
206619bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
206790ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
206819bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
206919bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2070de6a44a3SBarry Smith   }
2071de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2072de6a44a3SBarry Smith   if (flg) {
207319bcc07fSBarry Smith     Viewer tviewer;
207419bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
207519bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
207619bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
207719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2078de6a44a3SBarry Smith   }
20792593348eSBarry Smith   return 0;
20802593348eSBarry Smith }
20812593348eSBarry Smith 
20822593348eSBarry Smith 
20832593348eSBarry Smith 
2084