xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 119a934ad54493e8d6de8d4211cf3c366cb9dc12)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*119a934aSSatish Balay static char vcid[] = "$Id: baij.c,v 1.30 1996/04/07 22:45:39 curfman Exp balay $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
112593348eSBarry Smith #include "vec/vecimpl.h"
122593348eSBarry Smith #include "inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
142593348eSBarry Smith 
15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41bcd2baecSBarry Smith   ishift = a->indexshift;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
44bcd2baecSBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45bcd2baecSBarry Smith     CHKERRQ(ierr);
46bcd2baecSBarry Smith     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
472593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
48bcd2baecSBarry Smith   } else {
49bcd2baecSBarry Smith     if (ishift == oshift) {
50bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51bcd2baecSBarry Smith     }
52bcd2baecSBarry Smith     else if (ishift == -1) {
53bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
54bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
55bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
56bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
59bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60bcd2baecSBarry Smith     } else {
61bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
62bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
63bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
64bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
67bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68bcd2baecSBarry Smith     }
69bcd2baecSBarry Smith   }
702593348eSBarry Smith   return 0;
712593348eSBarry Smith }
722593348eSBarry Smith 
73de6a44a3SBarry Smith /*
74de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
75de6a44a3SBarry Smith */
76de6a44a3SBarry Smith 
77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
78de6a44a3SBarry Smith {
79de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
807fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
81de6a44a3SBarry Smith 
82de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
847fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
85de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86de6a44a3SBarry Smith       if (a->j[j] == i) {
87de6a44a3SBarry Smith         diag[i] = j;
88de6a44a3SBarry Smith         break;
89de6a44a3SBarry Smith       }
90de6a44a3SBarry Smith     }
91de6a44a3SBarry Smith   }
92de6a44a3SBarry Smith   a->diag = diag;
93de6a44a3SBarry Smith   return 0;
94de6a44a3SBarry Smith }
952593348eSBarry Smith 
962593348eSBarry Smith #include "draw.h"
972593348eSBarry Smith #include "pinclude/pviewer.h"
9877c4ece6SBarry Smith #include "sys.h"
992593348eSBarry Smith 
100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1012593348eSBarry Smith {
102b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1037e67e3f9SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104b6490206SBarry Smith   Scalar      *aa;
1052593348eSBarry Smith 
10690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1072593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1082593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1092593348eSBarry Smith   col_lens[1] = a->m;
1102593348eSBarry Smith   col_lens[2] = a->n;
1117e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1122593348eSBarry Smith 
1132593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
114b6490206SBarry Smith   count = 0;
115b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
116b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
117b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118b6490206SBarry Smith     }
1192593348eSBarry Smith   }
12077c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1212593348eSBarry Smith   PetscFree(col_lens);
1222593348eSBarry Smith 
1232593348eSBarry Smith   /* store column indices (zero start index) */
1247e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125b6490206SBarry Smith   count = 0;
126b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
127b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
128b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
130b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1312593348eSBarry Smith         }
1322593348eSBarry Smith       }
133b6490206SBarry Smith     }
134b6490206SBarry Smith   }
1357e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136b6490206SBarry Smith   PetscFree(jj);
1372593348eSBarry Smith 
1382593348eSBarry Smith   /* store nonzero values */
1397e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140b6490206SBarry Smith   count = 0;
141b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
142b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
143b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1457e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
146b6490206SBarry Smith         }
147b6490206SBarry Smith       }
148b6490206SBarry Smith     }
149b6490206SBarry Smith   }
1507e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151b6490206SBarry Smith   PetscFree(aa);
1522593348eSBarry Smith   return 0;
1532593348eSBarry Smith }
1542593348eSBarry Smith 
155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1562593348eSBarry Smith {
157b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1587e67e3f9SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
1592593348eSBarry Smith   FILE        *fd;
1602593348eSBarry Smith   char        *outputname;
1612593348eSBarry Smith 
16290ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1632593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16490ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
16590ace30eSBarry Smith   if (format == ASCII_FORMAT_INFO) {
1662593348eSBarry Smith     /* no need to print additional information */ ;
1672593348eSBarry Smith   }
16890ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
169b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1702593348eSBarry Smith   }
17144cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17244cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17344cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17444cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17544cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17644cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17744cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17844cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
18044cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18144cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18344cd7ae7SLois Curfman McInnes #else
18444cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18544cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18644cd7ae7SLois Curfman McInnes #endif
18744cd7ae7SLois Curfman McInnes           }
18844cd7ae7SLois Curfman McInnes         }
18944cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19044cd7ae7SLois Curfman McInnes       }
19144cd7ae7SLois Curfman McInnes     }
19244cd7ae7SLois Curfman McInnes   }
1932593348eSBarry Smith   else {
194b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
195b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
196b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
197b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
2007e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
20188685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2027e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes           else {
2057e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20688685aaeSLois Curfman McInnes           }
20788685aaeSLois Curfman McInnes #else
2087e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20988685aaeSLois Curfman McInnes #endif
2102593348eSBarry Smith           }
2112593348eSBarry Smith         }
2122593348eSBarry Smith         fprintf(fd,"\n");
2132593348eSBarry Smith       }
2142593348eSBarry Smith     }
215b6490206SBarry Smith   }
2162593348eSBarry Smith   fflush(fd);
2172593348eSBarry Smith   return 0;
2182593348eSBarry Smith }
2192593348eSBarry Smith 
220b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
2212593348eSBarry Smith {
2222593348eSBarry Smith   Mat         A = (Mat) obj;
22319bcc07fSBarry Smith   ViewerType  vtype;
22419bcc07fSBarry Smith   int         ierr;
2252593348eSBarry Smith 
2262593348eSBarry Smith   if (!viewer) {
22719bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
2282593348eSBarry Smith   }
22919bcc07fSBarry Smith 
23019bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
23119bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
232b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
2332593348eSBarry Smith   }
23419bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
2362593348eSBarry Smith   }
23719bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
238b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
2392593348eSBarry Smith   }
24019bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
241b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
2422593348eSBarry Smith   }
2432593348eSBarry Smith   return 0;
2442593348eSBarry Smith }
245b6490206SBarry Smith 
246*119a934aSSatish Balay #define CHUNKSIZE  10
247cd0e1443SSatish Balay 
248cd0e1443SSatish Balay /* This version has row oriented v  */
249cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250cd0e1443SSatish Balay {
251cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254cd0e1443SSatish Balay   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
2557e67e3f9SSatish Balay   int          ridx,cidx,bs2=bs*bs;
256cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
257cd0e1443SSatish Balay 
258cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
259cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
260cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
2627e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
263cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
264cd0e1443SSatish Balay     low = 0;
265cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
266cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268cd0e1443SSatish Balay       col = in[l] - shift; bcol = col/bs;
269cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
270cd0e1443SSatish Balay       if (roworiented) {
271cd0e1443SSatish Balay         value = *v++;
272cd0e1443SSatish Balay       }
273cd0e1443SSatish Balay       else {
274cd0e1443SSatish Balay         value = v[k + l*m];
275cd0e1443SSatish Balay       }
276cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
277cd0e1443SSatish Balay       while (high-low > 5) {
278cd0e1443SSatish Balay         t = (low+high)/2;
279cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
280cd0e1443SSatish Balay         else             low  = t;
281cd0e1443SSatish Balay       }
282cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
283cd0e1443SSatish Balay         if (rp[i] > bcol) break;
284cd0e1443SSatish Balay         if (rp[i] == bcol) {
2857e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
286cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
287cd0e1443SSatish Balay           else                  *bap  = value;
288cd0e1443SSatish Balay           goto noinsert;
289cd0e1443SSatish Balay         }
290cd0e1443SSatish Balay       }
291cd0e1443SSatish Balay       if (nonew) goto noinsert;
292cd0e1443SSatish Balay       if (nrow >= rmax) {
293cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
294cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295cd0e1443SSatish Balay         Scalar *new_a;
296cd0e1443SSatish Balay 
297cd0e1443SSatish Balay         /* malloc new storage space */
2987e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
3007e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
301cd0e1443SSatish Balay         new_i   = new_j + new_nz;
302cd0e1443SSatish Balay 
303cd0e1443SSatish Balay         /* copy over old data into new slots */
304cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
3057e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
3067e67e3f9SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
307cd0e1443SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
308cd0e1443SSatish Balay         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
309cd0e1443SSatish Balay                                                            len*sizeof(int));
3107e67e3f9SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
3117e67e3f9SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
3127e67e3f9SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
3137e67e3f9SSatish Balay                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
314cd0e1443SSatish Balay         /* free up old matrix storage */
315cd0e1443SSatish Balay         PetscFree(a->a);
316cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318cd0e1443SSatish Balay         a->singlemalloc = 1;
319cd0e1443SSatish Balay 
3207e67e3f9SSatish Balay         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
321cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
3227e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323*119a934aSSatish Balay         a->maxnz += CHUNKSIZE;
324cd0e1443SSatish Balay         a->reallocs++;
325*119a934aSSatish Balay         a->nz++;
326cd0e1443SSatish Balay       }
3277e67e3f9SSatish Balay       N = nrow++ - 1;
328cd0e1443SSatish Balay       /* shift up all the later entries in this row */
329cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
330cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
3317e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332cd0e1443SSatish Balay       }
3337e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334cd0e1443SSatish Balay       rp[i] = bcol;
3357e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
336cd0e1443SSatish Balay       noinsert:;
337cd0e1443SSatish Balay       low = i;
338cd0e1443SSatish Balay     }
339cd0e1443SSatish Balay     ailen[brow] = nrow;
340cd0e1443SSatish Balay   }
341cd0e1443SSatish Balay   return 0;
342cd0e1443SSatish Balay }
343cd0e1443SSatish Balay 
3440b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
3450b824a48SSatish Balay {
3460b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3470b824a48SSatish Balay   *m = a->m; *n = a->n;
3480b824a48SSatish Balay   return 0;
3490b824a48SSatish Balay }
3500b824a48SSatish Balay 
3510b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
3520b824a48SSatish Balay {
3530b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3540b824a48SSatish Balay   *m = 0; *n = a->m;
3550b824a48SSatish Balay   return 0;
3560b824a48SSatish Balay }
3570b824a48SSatish Balay 
3589867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3599867e207SSatish Balay {
3609867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3617e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
3629867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
3639867e207SSatish Balay 
3649867e207SSatish Balay   bs  = a->bs;
3659867e207SSatish Balay   ai  = a->i;
3669867e207SSatish Balay   aj  = a->j;
3679867e207SSatish Balay   aa  = a->a;
3687e67e3f9SSatish Balay   bs2 = bs*bs;
3699867e207SSatish Balay 
3709867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
3719867e207SSatish Balay 
3729867e207SSatish Balay   bn  = row/bs;   /* Block number */
3739867e207SSatish Balay   bp  = row % bs; /* Block Position */
3749867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
3759867e207SSatish Balay   *nz = bs*M;
3769867e207SSatish Balay 
3779867e207SSatish Balay   if (v) {
3789867e207SSatish Balay     *v = 0;
3799867e207SSatish Balay     if (*nz) {
3809867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
3819867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3829867e207SSatish Balay         v_i  = *v + i*bs;
3837e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
3847e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
3859867e207SSatish Balay       }
3869867e207SSatish Balay     }
3879867e207SSatish Balay   }
3889867e207SSatish Balay 
3899867e207SSatish Balay   if (idx) {
3909867e207SSatish Balay     *idx = 0;
3919867e207SSatish Balay     if (*nz) {
3929867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
3939867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3949867e207SSatish Balay         idx_i = *idx + i*bs;
3959867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
3969867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
3979867e207SSatish Balay       }
3989867e207SSatish Balay     }
3999867e207SSatish Balay   }
4009867e207SSatish Balay   return 0;
4019867e207SSatish Balay }
4029867e207SSatish Balay 
4039867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4049867e207SSatish Balay {
4059867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
4069867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
4079867e207SSatish Balay   return 0;
4089867e207SSatish Balay }
409b6490206SBarry Smith 
410f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411f2501298SSatish Balay {
412f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413f2501298SSatish Balay   Mat         C;
414f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415f2501298SSatish Balay   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
416f2501298SSatish Balay   Scalar      *array=a->a;
417f2501298SSatish Balay 
418f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
419f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
422f2501298SSatish Balay   if (shift) {
423f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
424f2501298SSatish Balay   }
425f2501298SSatish Balay   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
426f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
427f2501298SSatish Balay   PetscFree(col);
428f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
429f2501298SSatish Balay   cols = rows + bs;
430f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
431f2501298SSatish Balay     cols[0] = i*bs;
432f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
433f2501298SSatish Balay     len = ai[i+1] - ai[i];
434f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
435f2501298SSatish Balay       rows[0] = (*aj++)*bs;
436f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
437f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
438f2501298SSatish Balay       array += bs2;
439f2501298SSatish Balay     }
440f2501298SSatish Balay   }
4411073c447SSatish Balay   PetscFree(rows);
442f2501298SSatish Balay   if (shift) {
443f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
444f2501298SSatish Balay   }
445f2501298SSatish Balay 
446f2501298SSatish Balay   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
447f2501298SSatish Balay   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
448f2501298SSatish Balay 
449f2501298SSatish Balay   if (B != PETSC_NULL) {
450f2501298SSatish Balay     *B = C;
451f2501298SSatish Balay   } else {
452f2501298SSatish Balay     /* This isn't really an in-place transpose */
453f2501298SSatish Balay     PetscFree(a->a);
454f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
455f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
456f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
457f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
458f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
459f2501298SSatish Balay     PetscFree(a);
460f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
461f2501298SSatish Balay     PetscHeaderDestroy(C);
462f2501298SSatish Balay   }
463f2501298SSatish Balay   return 0;
464f2501298SSatish Balay }
465f2501298SSatish Balay 
466f2501298SSatish Balay 
467584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
468584200bdSSatish Balay {
469584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
470584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
471584200bdSSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift;
472584200bdSSatish Balay   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
473584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
474584200bdSSatish Balay 
475584200bdSSatish Balay   if (mode == FLUSH_ASSEMBLY) return 0;
476584200bdSSatish Balay 
477584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
478584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
479584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
480584200bdSSatish Balay     if (fshift) {
481584200bdSSatish Balay       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
482584200bdSSatish Balay       N = ailen[i];
483584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
484584200bdSSatish Balay         ip[j-fshift] = ip[j];
4857e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
486584200bdSSatish Balay       }
487584200bdSSatish Balay     }
488584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
489584200bdSSatish Balay   }
490584200bdSSatish Balay   if (mbs) {
491584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
492584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
493584200bdSSatish Balay   }
494584200bdSSatish Balay   /* reset ilen and imax for each row */
495584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
496584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
497584200bdSSatish Balay   }
498*119a934aSSatish Balay   a->nz = ai[mbs] + shift;
499584200bdSSatish Balay 
500584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
501584200bdSSatish Balay   if (fshift && a->diag) {
502584200bdSSatish Balay     PetscFree(a->diag);
503584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
504584200bdSSatish Balay     a->diag = 0;
505584200bdSSatish Balay   }
506584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
507584200bdSSatish Balay            fshift,a->nz,m);
508584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
509584200bdSSatish Balay            a->reallocs);
510584200bdSSatish Balay   return 0;
511584200bdSSatish Balay }
512584200bdSSatish Balay 
513b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
5142593348eSBarry Smith {
515b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
516de6a44a3SBarry Smith   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
5172593348eSBarry Smith   return 0;
5182593348eSBarry Smith }
5192593348eSBarry Smith 
520b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
5212593348eSBarry Smith {
5222593348eSBarry Smith   Mat         A  = (Mat) obj;
523b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5242593348eSBarry Smith 
5252593348eSBarry Smith #if defined(PETSC_LOG)
5262593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
5272593348eSBarry Smith #endif
5282593348eSBarry Smith   PetscFree(a->a);
5292593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
5302593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
5312593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
5322593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
5332593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
534de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
5352593348eSBarry Smith   PetscFree(a);
536f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
537f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
5382593348eSBarry Smith   return 0;
5392593348eSBarry Smith }
5402593348eSBarry Smith 
541b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
5422593348eSBarry Smith {
543b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5442593348eSBarry Smith   if      (op == ROW_ORIENTED)              a->roworiented = 1;
5452593348eSBarry Smith   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
5462593348eSBarry Smith   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
5472593348eSBarry Smith   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
5482593348eSBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
5492593348eSBarry Smith   else if (op == ROWS_SORTED ||
5502593348eSBarry Smith            op == SYMMETRIC_MATRIX ||
5512593348eSBarry Smith            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5522593348eSBarry Smith            op == YES_NEW_DIAGONALS)
55394a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
5542593348eSBarry Smith   else if (op == NO_NEW_DIAGONALS)
555b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
5562593348eSBarry Smith   else
557b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
5582593348eSBarry Smith   return 0;
5592593348eSBarry Smith }
5602593348eSBarry Smith 
5612593348eSBarry Smith 
5622593348eSBarry Smith /* -------------------------------------------------------*/
5632593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
5642593348eSBarry Smith /* -------------------------------------------------------*/
565b6490206SBarry Smith #include "pinclude/plapack.h"
566b6490206SBarry Smith 
567b6490206SBarry Smith static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
5682593348eSBarry Smith {
569b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
570b6490206SBarry Smith   Scalar          *xg,*yg;
571b6490206SBarry Smith   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
572b6490206SBarry Smith   register Scalar x1,x2,x3,x4,x5;
573b6490206SBarry Smith   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
574b6490206SBarry Smith   int             bs = a->bs,j,n,bs2 = bs*bs;
5752593348eSBarry Smith 
576b6490206SBarry Smith   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
577b6490206SBarry Smith   PetscMemzero(y,m*sizeof(Scalar));
578*119a934aSSatish Balay   /*
579*119a934aSSatish Balay x     = x; */
5802593348eSBarry Smith   idx   = a->j;
5812593348eSBarry Smith   v     = a->a;
5822593348eSBarry Smith   ii    = a->i;
583b6490206SBarry Smith 
584b6490206SBarry Smith   switch (bs) {
585b6490206SBarry Smith     case 1:
586*119a934aSSatish Balay      for ( i=0; i<mbs; i++ ) {
5872593348eSBarry Smith        n    = ii[1] - ii[0]; ii++;
5882593348eSBarry Smith        sum  = 0.0;
5892593348eSBarry Smith        while (n--) sum += *v++ * x[*idx++];
5902593348eSBarry Smith        y[i] = sum;
5912593348eSBarry Smith       }
5922593348eSBarry Smith       break;
593b6490206SBarry Smith     case 2:
594b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
595de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
596b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0;
597b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
598b6490206SBarry Smith           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
599b6490206SBarry Smith           sum1 += v[0]*x1 + v[2]*x2;
600b6490206SBarry Smith           sum2 += v[1]*x1 + v[3]*x2;
601b6490206SBarry Smith           v += 4;
602b6490206SBarry Smith         }
603*119a934aSSatish Balay         y[0] = sum1; y[1] = sum2;
604b6490206SBarry Smith         y += 2;
605b6490206SBarry Smith       }
606b6490206SBarry Smith       break;
607b6490206SBarry Smith     case 3:
608b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
609de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
610b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
611b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
612b6490206SBarry Smith           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
613b6490206SBarry Smith           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
614b6490206SBarry Smith           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
615b6490206SBarry Smith           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
616b6490206SBarry Smith           v += 9;
617b6490206SBarry Smith         }
618*119a934aSSatish Balay         y[0] = sum1; y[1] = sum2; y[2] = sum3;
619b6490206SBarry Smith         y += 3;
620b6490206SBarry Smith       }
621b6490206SBarry Smith       break;
622b6490206SBarry Smith     case 4:
623b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
624de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
625b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
626b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
627b6490206SBarry Smith           xb = x + 4*(*idx++);
628b6490206SBarry Smith           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
629b6490206SBarry Smith           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
630b6490206SBarry Smith           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
631b6490206SBarry Smith           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
632b6490206SBarry Smith           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
633b6490206SBarry Smith           v += 16;
634b6490206SBarry Smith         }
635*119a934aSSatish Balay         y[0] = sum1; y[1] = sum2; y[2] = sum3; y[3] = sum4;
636b6490206SBarry Smith         y += 4;
637b6490206SBarry Smith       }
638b6490206SBarry Smith       break;
639b6490206SBarry Smith     case 5:
640b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
641de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
642b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
643b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
644b6490206SBarry Smith           xb = x + 5*(*idx++);
645b6490206SBarry Smith           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
646b6490206SBarry Smith           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
647b6490206SBarry Smith           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
648b6490206SBarry Smith           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
649b6490206SBarry Smith           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
650b6490206SBarry Smith           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
651b6490206SBarry Smith           v += 25;
652b6490206SBarry Smith         }
653*119a934aSSatish Balay         y[0] = sum1; y[1] = sum2; y[2] = sum3; y[3] = sum4; y[4] = sum5;
654b6490206SBarry Smith         y += 5;
655b6490206SBarry Smith       }
656b6490206SBarry Smith       break;
657b6490206SBarry Smith       /* block sizes larger then 5 by 5 are handled by BLAS */
658b6490206SBarry Smith     default: {
659de6a44a3SBarry Smith       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
660de6a44a3SBarry Smith       if (!a->mult_work) {
661de6a44a3SBarry Smith         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
662de6a44a3SBarry Smith         CHKPTRQ(a->mult_work);
663de6a44a3SBarry Smith       }
664de6a44a3SBarry Smith       work = a->mult_work;
665b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
666de6a44a3SBarry Smith         n     = ii[1] - ii[0]; ii++;
667de6a44a3SBarry Smith         ncols = n*bs;
668de6a44a3SBarry Smith         workt = work;
669b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
670b6490206SBarry Smith           xb = x + bs*(*idx++);
671de6a44a3SBarry Smith           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
672de6a44a3SBarry Smith           workt += bs;
673b6490206SBarry Smith         }
674de6a44a3SBarry Smith         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
675de6a44a3SBarry Smith         v += n*bs2;
676b6490206SBarry Smith         y += bs;
6772593348eSBarry Smith       }
6782593348eSBarry Smith     }
6792593348eSBarry Smith   }
680*119a934aSSatish Balay   PLogFlops(2*bs2* (a->nz) - m);
681*119a934aSSatish Balay   return 0;
682*119a934aSSatish Balay }
683*119a934aSSatish Balay 
684*119a934aSSatish Balay static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
685*119a934aSSatish Balay {
686*119a934aSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
687*119a934aSSatish Balay   Scalar          *xg,*yg,*zg;
688*119a934aSSatish Balay   register Scalar *x,*y,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
689*119a934aSSatish Balay   register Scalar x1,x2,x3,x4,x5;
690*119a934aSSatish Balay   int             mbs=a->mbs,m=a->m,i,*idx,*ii;
691*119a934aSSatish Balay   int             bs=a->bs,j,n,bs2=bs*bs;
692*119a934aSSatish Balay 
693*119a934aSSatish Balay   VecGetArray(xx,&xg); x = xg;
694*119a934aSSatish Balay   VecGetArray(yy,&yg); y = yg;
695*119a934aSSatish Balay   VecGetArray(zz,&zg); z = zg;
696*119a934aSSatish Balay 
697*119a934aSSatish Balay   if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar));
698*119a934aSSatish Balay   else if (zz!=yy) PetscMemcpy(z,y,m*sizeof(Scalar));
699*119a934aSSatish Balay 
700*119a934aSSatish Balay   idx   = a->j;
701*119a934aSSatish Balay   v     = a->a;
702*119a934aSSatish Balay   ii    = a->i;
703*119a934aSSatish Balay 
704*119a934aSSatish Balay   switch (bs) {
705*119a934aSSatish Balay   case 1:
706*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
707*119a934aSSatish Balay       n    = ii[1] - ii[0]; ii++;
708*119a934aSSatish Balay       sum  = 0.0;
709*119a934aSSatish Balay       while (n--) sum += *v++ * x[*idx++];
710*119a934aSSatish Balay       z[i] += sum;
711*119a934aSSatish Balay     }
712*119a934aSSatish Balay     break;
713*119a934aSSatish Balay   case 2:
714*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
715*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
716*119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0;
717*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
718*119a934aSSatish Balay         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
719*119a934aSSatish Balay         sum1 += v[0]*x1 + v[2]*x2;
720*119a934aSSatish Balay         sum2 += v[1]*x1 + v[3]*x2;
721*119a934aSSatish Balay         v += 4;
722*119a934aSSatish Balay       }
723*119a934aSSatish Balay       z[0] += sum1; z[1] += sum2;
724*119a934aSSatish Balay       z += 2;
725*119a934aSSatish Balay     }
726*119a934aSSatish Balay     break;
727*119a934aSSatish Balay   case 3:
728*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
729*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
730*119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
731*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
732*119a934aSSatish Balay         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
733*119a934aSSatish Balay         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
734*119a934aSSatish Balay         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
735*119a934aSSatish Balay         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
736*119a934aSSatish Balay         v += 9;
737*119a934aSSatish Balay       }
738*119a934aSSatish Balay       z[0] += sum1; z[1] += sum2; z[2] += sum3;
739*119a934aSSatish Balay       z += 3;
740*119a934aSSatish Balay     }
741*119a934aSSatish Balay     break;
742*119a934aSSatish Balay   case 4:
743*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
744*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
745*119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
746*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
747*119a934aSSatish Balay         xb = x + 4*(*idx++);
748*119a934aSSatish Balay         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
749*119a934aSSatish Balay         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
750*119a934aSSatish Balay         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
751*119a934aSSatish Balay         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
752*119a934aSSatish Balay         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
753*119a934aSSatish Balay         v += 16;
754*119a934aSSatish Balay       }
755*119a934aSSatish Balay       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4;
756*119a934aSSatish Balay       z += 4;
757*119a934aSSatish Balay     }
758*119a934aSSatish Balay     break;
759*119a934aSSatish Balay   case 5:
760*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
761*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
762*119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
763*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
764*119a934aSSatish Balay         xb = x + 5*(*idx++);
765*119a934aSSatish Balay         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
766*119a934aSSatish Balay         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
767*119a934aSSatish Balay         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
768*119a934aSSatish Balay         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
769*119a934aSSatish Balay         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
770*119a934aSSatish Balay         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
771*119a934aSSatish Balay         v += 25;
772*119a934aSSatish Balay       }
773*119a934aSSatish Balay       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5;
774*119a934aSSatish Balay       z += 5;
775*119a934aSSatish Balay     }
776*119a934aSSatish Balay     break;
777*119a934aSSatish Balay     /* block sizes larger then 5 by 5 are handled by BLAS */
778*119a934aSSatish Balay   default: {
779*119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
780*119a934aSSatish Balay       if (!a->mult_work) {
781*119a934aSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
782*119a934aSSatish Balay         CHKPTRQ(a->mult_work);
783*119a934aSSatish Balay       }
784*119a934aSSatish Balay       work = a->mult_work;
785*119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
786*119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
787*119a934aSSatish Balay         ncols = n*bs;
788*119a934aSSatish Balay         workt = work;
789*119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
790*119a934aSSatish Balay           xb = x + bs*(*idx++);
791*119a934aSSatish Balay           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
792*119a934aSSatish Balay           workt += bs;
793*119a934aSSatish Balay         }
794*119a934aSSatish Balay         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
795*119a934aSSatish Balay         v += n*bs2;
796*119a934aSSatish Balay         z += bs;
797*119a934aSSatish Balay       }
798*119a934aSSatish Balay     }
799*119a934aSSatish Balay   }
800*119a934aSSatish Balay   PLogFlops(2*bs2*(a->nz));
801*119a934aSSatish Balay   return 0;
802*119a934aSSatish Balay }
803*119a934aSSatish Balay 
804*119a934aSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
805*119a934aSSatish Balay {
806*119a934aSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
807*119a934aSSatish Balay   Scalar          *xg,*yg,*zg,*zb;
808*119a934aSSatish Balay   register Scalar *x,*y,*z,*v,*xb,x1,x2,x3,x4,x5;
809*119a934aSSatish Balay   int             mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval;
810*119a934aSSatish Balay   int             bs=a->bs,j,n,bs2=bs*bs,*ib;
811*119a934aSSatish Balay 
812*119a934aSSatish Balay   VecGetArray(xx,&xg); x = xg;
813*119a934aSSatish Balay   VecGetArray(yy,&yg); y = yg;
814*119a934aSSatish Balay   VecGetArray(zz,&zg); z = zg;
815*119a934aSSatish Balay 
816*119a934aSSatish Balay   if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar));
817*119a934aSSatish Balay   else if (zz!=yy) PetscMemcpy(z,y,N*sizeof(Scalar));
818*119a934aSSatish Balay 
819*119a934aSSatish Balay   idx   = a->j;
820*119a934aSSatish Balay   v     = a->a;
821*119a934aSSatish Balay   ii    = a->i;
822*119a934aSSatish Balay 
823*119a934aSSatish Balay   switch (bs) {
824*119a934aSSatish Balay   case 1:
825*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
826*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
827*119a934aSSatish Balay       xb = x + i; x1 = xb[0];
828*119a934aSSatish Balay       ib = idx + ai[i];
829*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
830*119a934aSSatish Balay         z[ib[j]] += *v++ * x1;
831*119a934aSSatish Balay       }
832*119a934aSSatish Balay     }
833*119a934aSSatish Balay     break;
834*119a934aSSatish Balay   case 2:
835*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
836*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
837*119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
838*119a934aSSatish Balay       ib = idx + ai[i];
839*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
840*119a934aSSatish Balay         rval = ib[j]*2;
841*119a934aSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
842*119a934aSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
843*119a934aSSatish Balay         v += 4;
844*119a934aSSatish Balay       }
845*119a934aSSatish Balay     }
846*119a934aSSatish Balay     break;
847*119a934aSSatish Balay   case 3:
848*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
849*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
850*119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
851*119a934aSSatish Balay       ib = idx + ai[i];
852*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
853*119a934aSSatish Balay         rval = ib[j]*3;
854*119a934aSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
855*119a934aSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
856*119a934aSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
857*119a934aSSatish Balay         v += 9;
858*119a934aSSatish Balay       }
859*119a934aSSatish Balay     }
860*119a934aSSatish Balay     break;
861*119a934aSSatish Balay   case 4:
862*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
863*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
864*119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
865*119a934aSSatish Balay       ib = idx + ai[i];
866*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
867*119a934aSSatish Balay         rval = ib[j]*4;
868*119a934aSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
869*119a934aSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
870*119a934aSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
871*119a934aSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
872*119a934aSSatish Balay         v += 16;
873*119a934aSSatish Balay       }
874*119a934aSSatish Balay     }
875*119a934aSSatish Balay     break;
876*119a934aSSatish Balay   case 5:
877*119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
878*119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
879*119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
880*119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
881*119a934aSSatish Balay       ib = idx + ai[i];
882*119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
883*119a934aSSatish Balay         rval = ib[j]*5;
884*119a934aSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
885*119a934aSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
886*119a934aSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
887*119a934aSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
888*119a934aSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
889*119a934aSSatish Balay         v += 25;
890*119a934aSSatish Balay       }
891*119a934aSSatish Balay     }
892*119a934aSSatish Balay     break;
893*119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
894*119a934aSSatish Balay     default: {
895*119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
896*119a934aSSatish Balay       if (!a->mult_work) {
897*119a934aSSatish Balay         /* must be max of m,n */
898*119a934aSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
899*119a934aSSatish Balay         CHKPTRQ(a->mult_work);
900*119a934aSSatish Balay       }
901*119a934aSSatish Balay       work = a->mult_work;
902*119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
903*119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
904*119a934aSSatish Balay         ncols = n*bs;
905*119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
906*119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
907*119a934aSSatish Balay         v += n*bs2;
908*119a934aSSatish Balay         x += bs;
909*119a934aSSatish Balay         workt = work;
910*119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
911*119a934aSSatish Balay           zb = z + bs*(*idx++);
912*119a934aSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
913*119a934aSSatish Balay           workt += bs;
914*119a934aSSatish Balay         }
915*119a934aSSatish Balay       }
916*119a934aSSatish Balay     }
917*119a934aSSatish Balay   }
918*119a934aSSatish Balay   PLogFlops(2*bs2*(a->nz));
9192593348eSBarry Smith   return 0;
9202593348eSBarry Smith }
9212593348eSBarry Smith 
922de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
9232593348eSBarry Smith {
924b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
925bcd2baecSBarry Smith   if (nz)  *nz  = a->bs*a->bs*a->nz;
926bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
927bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
9282593348eSBarry Smith   return 0;
9292593348eSBarry Smith }
9302593348eSBarry Smith 
93191d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
93291d316f6SSatish Balay {
93391d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
93491d316f6SSatish Balay 
93591d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
93691d316f6SSatish Balay 
93791d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
93891d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
93991d316f6SSatish Balay       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
94091d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
94191d316f6SSatish Balay   }
94291d316f6SSatish Balay 
94391d316f6SSatish Balay   /* if the a->i are the same */
94491d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
94591d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
94691d316f6SSatish Balay   }
94791d316f6SSatish Balay 
94891d316f6SSatish Balay   /* if a->j are the same */
94991d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
95091d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
95191d316f6SSatish Balay   }
95291d316f6SSatish Balay 
95391d316f6SSatish Balay   /* if a->a are the same */
95491d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
95591d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
95691d316f6SSatish Balay   }
95791d316f6SSatish Balay   *flg = PETSC_TRUE;
95891d316f6SSatish Balay   return 0;
95991d316f6SSatish Balay 
96091d316f6SSatish Balay }
96191d316f6SSatish Balay 
96291d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
96391d316f6SSatish Balay {
96491d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9657e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
96617e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
96717e48fc4SSatish Balay 
96817e48fc4SSatish Balay   bs   = a->bs;
96917e48fc4SSatish Balay   aa   = a->a;
97017e48fc4SSatish Balay   ai   = a->i;
97117e48fc4SSatish Balay   aj   = a->j;
97217e48fc4SSatish Balay   ambs = a->mbs;
9737e67e3f9SSatish Balay   bs2  = bs*bs;
97491d316f6SSatish Balay 
97591d316f6SSatish Balay   VecSet(&zero,v);
97691d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
9779867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
97817e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
97917e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
98017e48fc4SSatish Balay       if (aj[j] == i) {
98117e48fc4SSatish Balay         row  = i*bs;
9827e67e3f9SSatish Balay         aa_j = aa+j*bs2;
9837e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
98491d316f6SSatish Balay         break;
98591d316f6SSatish Balay       }
98691d316f6SSatish Balay     }
98791d316f6SSatish Balay   }
98891d316f6SSatish Balay   return 0;
98991d316f6SSatish Balay }
99091d316f6SSatish Balay 
9919867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
9929867e207SSatish Balay {
9939867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9949867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
9957e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
9969867e207SSatish Balay 
9979867e207SSatish Balay   ai  = a->i;
9989867e207SSatish Balay   aj  = a->j;
9999867e207SSatish Balay   aa  = a->a;
10009867e207SSatish Balay   m   = a->m;
10019867e207SSatish Balay   n   = a->n;
10029867e207SSatish Balay   bs  = a->bs;
10039867e207SSatish Balay   mbs = a->mbs;
10047e67e3f9SSatish Balay   bs2 = bs*bs;
10059867e207SSatish Balay   if (ll) {
10069867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
10079867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
10089867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
10099867e207SSatish Balay       M  = ai[i+1] - ai[i];
10109867e207SSatish Balay       li = l + i*bs;
10117e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
10129867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
10137e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
10149867e207SSatish Balay           (*v++) *= li[k%bs];
10159867e207SSatish Balay         }
10169867e207SSatish Balay       }
10179867e207SSatish Balay     }
10189867e207SSatish Balay   }
10199867e207SSatish Balay 
10209867e207SSatish Balay   if (rr) {
10219867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
10229867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
10239867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
10249867e207SSatish Balay       M  = ai[i+1] - ai[i];
10257e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
10269867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
10279867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
10289867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
10299867e207SSatish Balay           x = ri[k];
10309867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
10319867e207SSatish Balay         }
10329867e207SSatish Balay       }
10339867e207SSatish Balay     }
10349867e207SSatish Balay   }
10359867e207SSatish Balay   return 0;
10369867e207SSatish Balay }
10379867e207SSatish Balay 
10389867e207SSatish Balay 
1039b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1040b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1041b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
10427fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
10437fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
10447fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
10457fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
10467fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
10477fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
10482593348eSBarry Smith 
1049b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
10502593348eSBarry Smith {
1051b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
10522593348eSBarry Smith   Scalar      *v = a->a;
10532593348eSBarry Smith   double      sum = 0.0;
1054b6490206SBarry Smith   int         i;
10552593348eSBarry Smith 
10562593348eSBarry Smith   if (type == NORM_FROBENIUS) {
10572593348eSBarry Smith     for (i=0; i<a->nz; i++ ) {
10582593348eSBarry Smith #if defined(PETSC_COMPLEX)
10592593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
10602593348eSBarry Smith #else
10612593348eSBarry Smith       sum += (*v)*(*v); v++;
10622593348eSBarry Smith #endif
10632593348eSBarry Smith     }
10642593348eSBarry Smith     *norm = sqrt(sum);
10652593348eSBarry Smith   }
10662593348eSBarry Smith   else {
1067b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
10682593348eSBarry Smith   }
10692593348eSBarry Smith   return 0;
10702593348eSBarry Smith }
10712593348eSBarry Smith 
10722593348eSBarry Smith /*
10732593348eSBarry Smith      note: This can only work for identity for row and col. It would
10742593348eSBarry Smith    be good to check this and otherwise generate an error.
10752593348eSBarry Smith */
1076b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
10772593348eSBarry Smith {
1078b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
10792593348eSBarry Smith   Mat         outA;
1080de6a44a3SBarry Smith   int         ierr;
10812593348eSBarry Smith 
1082b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
10832593348eSBarry Smith 
10842593348eSBarry Smith   outA          = inA;
10852593348eSBarry Smith   inA->factor   = FACTOR_LU;
10862593348eSBarry Smith   a->row        = row;
10872593348eSBarry Smith   a->col        = col;
10882593348eSBarry Smith 
1089de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
10902593348eSBarry Smith 
10912593348eSBarry Smith   if (!a->diag) {
1092b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
10932593348eSBarry Smith   }
10947fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
10952593348eSBarry Smith   return 0;
10962593348eSBarry Smith }
10972593348eSBarry Smith 
1098b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
10992593348eSBarry Smith {
1100b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1101b6490206SBarry Smith   int         one = 1, totalnz = a->bs*a->bs*a->nz;
1102b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1103b6490206SBarry Smith   PLogFlops(totalnz);
11042593348eSBarry Smith   return 0;
11052593348eSBarry Smith }
11062593348eSBarry Smith 
11077e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
11087e67e3f9SSatish Balay {
11097e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
11107e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
11117e67e3f9SSatish Balay   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
11127e67e3f9SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
11137e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
11147e67e3f9SSatish Balay 
11157e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
11167e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
11177e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
11187e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
11197e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
11207e67e3f9SSatish Balay     nrow = ailen[brow];
11217e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
11227e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
11237e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
11247e67e3f9SSatish Balay       col  = in[l] - shift;
11257e67e3f9SSatish Balay       bcol = col/bs;
11267e67e3f9SSatish Balay       cidx = col%bs;
11277e67e3f9SSatish Balay       ridx = row%bs;
11287e67e3f9SSatish Balay       high = nrow;
11297e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
11307e67e3f9SSatish Balay       while (high-low > 5) {
11317e67e3f9SSatish Balay         t = (low+high)/2;
11327e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
11337e67e3f9SSatish Balay         else             low  = t;
11347e67e3f9SSatish Balay       }
11357e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
11367e67e3f9SSatish Balay         if (rp[i] > bcol) break;
11377e67e3f9SSatish Balay         if (rp[i] == bcol) {
11387e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
11397e67e3f9SSatish Balay           goto finished;
11407e67e3f9SSatish Balay         }
11417e67e3f9SSatish Balay       }
11427e67e3f9SSatish Balay       *v++ = zero;
11437e67e3f9SSatish Balay       finished:;
11447e67e3f9SSatish Balay     }
11457e67e3f9SSatish Balay   }
11467e67e3f9SSatish Balay   return 0;
11477e67e3f9SSatish Balay }
11487e67e3f9SSatish Balay 
1149b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A)
11502593348eSBarry Smith {
11512593348eSBarry Smith   static int called = 0;
11522593348eSBarry Smith 
11532593348eSBarry Smith   if (called) return 0; else called = 1;
11542593348eSBarry Smith   return 0;
11552593348eSBarry Smith }
11562593348eSBarry Smith /* -------------------------------------------------------------------*/
1157cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
11589867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1159*119a934aSSatish Balay        MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ,
1160*119a934aSSatish Balay        0,MatMultTransAdd_SeqBAIJ,
1161de6a44a3SBarry Smith        MatSolve_SeqBAIJ,0,
1162b6490206SBarry Smith        0,0,
1163de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1164b6490206SBarry Smith        0,
1165f2501298SSatish Balay        MatTranspose_SeqBAIJ,
116617e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
11679867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1168584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1169b6490206SBarry Smith        0,
1170b6490206SBarry Smith        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1171b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
11727fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1173b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1174de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1175b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1176b6490206SBarry Smith        0,0,
1177b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1178b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1179b6490206SBarry Smith        0,0,
11807e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1181b6490206SBarry Smith        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1182b6490206SBarry Smith        0};
11832593348eSBarry Smith 
11842593348eSBarry Smith /*@C
118544cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
118644cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
118744cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
11882593348eSBarry Smith    (or nzz).  By setting these parameters accurately, performance can be
11892593348eSBarry Smith    increased by more than a factor of 50.
11902593348eSBarry Smith 
11912593348eSBarry Smith    Input Parameters:
11922593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1193b6490206SBarry Smith .  bs - size of block
11942593348eSBarry Smith .  m - number of rows
11952593348eSBarry Smith .  n - number of columns
1196b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
1197b6490206SBarry Smith .  nzz - number of block nonzeros per block row or PETSC_NULL
1198b6490206SBarry Smith          (possibly different for each row)
11992593348eSBarry Smith 
12002593348eSBarry Smith    Output Parameter:
12012593348eSBarry Smith .  A - the matrix
12022593348eSBarry Smith 
12032593348eSBarry Smith    Notes:
120444cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
12052593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
120644cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
12072593348eSBarry Smith 
12082593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
12092593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
12102593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
12112593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
12122593348eSBarry Smith 
121344cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
12142593348eSBarry Smith @*/
1215b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
12162593348eSBarry Smith {
12172593348eSBarry Smith   Mat         B;
1218b6490206SBarry Smith   Mat_SeqBAIJ *b;
1219f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1220b6490206SBarry Smith 
1221f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1222f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
12232593348eSBarry Smith 
12242593348eSBarry Smith   *A = 0;
1225b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
12262593348eSBarry Smith   PLogObjectCreate(B);
1227b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
122844cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
12292593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
12307fc0212eSBarry Smith   switch (bs) {
12317fc0212eSBarry Smith     case 1:
12327fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
12337fc0212eSBarry Smith       break;
12344eeb42bcSBarry Smith     case 2:
12354eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
12366d84be18SBarry Smith       break;
1237f327dd97SBarry Smith     case 3:
1238f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
12394eeb42bcSBarry Smith       break;
12406d84be18SBarry Smith     case 4:
12416d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
12426d84be18SBarry Smith       break;
12436d84be18SBarry Smith     case 5:
12446d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
12456d84be18SBarry Smith       break;
12467fc0212eSBarry Smith   }
1247b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1248b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
12492593348eSBarry Smith   B->factor           = 0;
12502593348eSBarry Smith   B->lupivotthreshold = 1.0;
12512593348eSBarry Smith   b->row              = 0;
12522593348eSBarry Smith   b->col              = 0;
12532593348eSBarry Smith   b->reallocs         = 0;
12542593348eSBarry Smith 
125544cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
125644cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1257b6490206SBarry Smith   b->mbs     = mbs;
1258f2501298SSatish Balay   b->nbs     = nbs;
1259b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
12602593348eSBarry Smith   if (nnz == PETSC_NULL) {
1261*119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
12622593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1263b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1264b6490206SBarry Smith     nz = nz*mbs;
12652593348eSBarry Smith   }
12662593348eSBarry Smith   else {
12672593348eSBarry Smith     nz = 0;
1268b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
12692593348eSBarry Smith   }
12702593348eSBarry Smith 
12712593348eSBarry Smith   /* allocate the matrix space */
12727e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
12732593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
12747e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
12757e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
12762593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
12772593348eSBarry Smith   b->i  = b->j + nz;
12782593348eSBarry Smith   b->singlemalloc = 1;
12792593348eSBarry Smith 
1280de6a44a3SBarry Smith   b->i[0] = 0;
1281b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
12822593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
12832593348eSBarry Smith   }
12842593348eSBarry Smith 
1285b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1286b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1287b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1288b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
12892593348eSBarry Smith 
1290b6490206SBarry Smith   b->bs               = bs;
1291b6490206SBarry Smith   b->mbs              = mbs;
12922593348eSBarry Smith   b->nz               = 0;
12932593348eSBarry Smith   b->maxnz            = nz;
12942593348eSBarry Smith   b->sorted           = 0;
12952593348eSBarry Smith   b->roworiented      = 1;
12962593348eSBarry Smith   b->nonew            = 0;
12972593348eSBarry Smith   b->diag             = 0;
12982593348eSBarry Smith   b->solve_work       = 0;
1299de6a44a3SBarry Smith   b->mult_work        = 0;
13002593348eSBarry Smith   b->spptr            = 0;
13011073c447SSatish Balay   b->indexshift       = 0;
13022593348eSBarry Smith   *A = B;
13032593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
13042593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
13052593348eSBarry Smith   return 0;
13062593348eSBarry Smith }
13072593348eSBarry Smith 
1308b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
13092593348eSBarry Smith {
13102593348eSBarry Smith   Mat         C;
1311b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
13127e67e3f9SSatish Balay   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1313de6a44a3SBarry Smith 
1314de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
13152593348eSBarry Smith 
13162593348eSBarry Smith   *B = 0;
1317b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
13182593348eSBarry Smith   PLogObjectCreate(C);
1319b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
13202593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1321b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1322b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
13232593348eSBarry Smith   C->factor     = A->factor;
13242593348eSBarry Smith   c->row        = 0;
13252593348eSBarry Smith   c->col        = 0;
13262593348eSBarry Smith   C->assembled  = PETSC_TRUE;
13272593348eSBarry Smith 
132844cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
132944cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
133044cd7ae7SLois Curfman McInnes   C->M          = a->m;
133144cd7ae7SLois Curfman McInnes   C->N          = a->n;
133244cd7ae7SLois Curfman McInnes 
1333b6490206SBarry Smith   c->bs         = a->bs;
1334b6490206SBarry Smith   c->mbs        = a->mbs;
1335b6490206SBarry Smith   c->nbs        = a->nbs;
13361073c447SSatish Balay   c->indexshift = 0;
13372593348eSBarry Smith 
1338b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1339b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1340b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
13412593348eSBarry Smith     c->imax[i] = a->imax[i];
13422593348eSBarry Smith     c->ilen[i] = a->ilen[i];
13432593348eSBarry Smith   }
13442593348eSBarry Smith 
13452593348eSBarry Smith   /* allocate the matrix space */
13462593348eSBarry Smith   c->singlemalloc = 1;
13477e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
13482593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
13497e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1350de6a44a3SBarry Smith   c->i  = c->j + nz;
1351b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1352b6490206SBarry Smith   if (mbs > 0) {
1353de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
13542593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
13557e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
13562593348eSBarry Smith     }
13572593348eSBarry Smith   }
13582593348eSBarry Smith 
1359b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
13602593348eSBarry Smith   c->sorted      = a->sorted;
13612593348eSBarry Smith   c->roworiented = a->roworiented;
13622593348eSBarry Smith   c->nonew       = a->nonew;
13632593348eSBarry Smith 
13642593348eSBarry Smith   if (a->diag) {
1365b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1366b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1367b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
13682593348eSBarry Smith       c->diag[i] = a->diag[i];
13692593348eSBarry Smith     }
13702593348eSBarry Smith   }
13712593348eSBarry Smith   else c->diag          = 0;
13722593348eSBarry Smith   c->nz                 = a->nz;
13732593348eSBarry Smith   c->maxnz              = a->maxnz;
13742593348eSBarry Smith   c->solve_work         = 0;
13752593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
13767fc0212eSBarry Smith   c->mult_work          = 0;
13772593348eSBarry Smith   *B = C;
13782593348eSBarry Smith   return 0;
13792593348eSBarry Smith }
13802593348eSBarry Smith 
138119bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
13822593348eSBarry Smith {
1383b6490206SBarry Smith   Mat_SeqBAIJ  *a;
13842593348eSBarry Smith   Mat          B;
1385de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1386b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
138735aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1388de6a44a3SBarry Smith   int          *masked, nmask,tmp,ishift,bs2;
1389b6490206SBarry Smith   Scalar       *aa;
139019bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
13912593348eSBarry Smith 
139235aab85fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1393de6a44a3SBarry Smith   bs2  = bs*bs;
1394b6490206SBarry Smith 
13952593348eSBarry Smith   MPI_Comm_size(comm,&size);
1396b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
139790ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
139877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1399de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
14002593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
14012593348eSBarry Smith 
1402b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
140335aab85fSBarry Smith 
140435aab85fSBarry Smith   /*
140535aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
140635aab85fSBarry Smith     divisible by the blocksize
140735aab85fSBarry Smith   */
1408b6490206SBarry Smith   mbs        = M/bs;
140935aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
141035aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
141135aab85fSBarry Smith   else                  mbs++;
141235aab85fSBarry Smith   if (extra_rows) {
141335aab85fSBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
141435aab85fSBarry Smith   }
1415b6490206SBarry Smith 
14162593348eSBarry Smith   /* read in row lengths */
141735aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
141877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
141935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
14202593348eSBarry Smith 
1421b6490206SBarry Smith   /* read in column indices */
142235aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
142377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
142435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1425b6490206SBarry Smith 
1426b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1427b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1428b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
142935aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
143035aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
143135aab85fSBarry Smith   masked = mask + mbs;
1432b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1433b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
143435aab85fSBarry Smith     nmask = 0;
1435b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1436b6490206SBarry Smith       kmax = rowlengths[rowcount];
1437b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
143835aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
143935aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1440b6490206SBarry Smith       }
1441b6490206SBarry Smith       rowcount++;
1442b6490206SBarry Smith     }
144335aab85fSBarry Smith     browlengths[i] += nmask;
144435aab85fSBarry Smith     /* zero out the mask elements we set */
144535aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1446b6490206SBarry Smith   }
1447b6490206SBarry Smith 
14482593348eSBarry Smith   /* create our matrix */
144935aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
145035aab85fSBarry Smith          CHKERRQ(ierr);
14512593348eSBarry Smith   B = *A;
1452b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
14532593348eSBarry Smith 
14542593348eSBarry Smith   /* set matrix "i" values */
1455de6a44a3SBarry Smith   a->i[0] = 0;
1456b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1457b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1458b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
14592593348eSBarry Smith   }
14609867e207SSatish Balay   a->indexshift = 0;
1461b6490206SBarry Smith   a->nz         = 0;
1462b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
14632593348eSBarry Smith 
1464b6490206SBarry Smith   /* read in nonzero values */
146535aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
146677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
146735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1468b6490206SBarry Smith 
1469b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1470b6490206SBarry Smith   nzcount = 0; jcount = 0;
1471b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1472b6490206SBarry Smith     nzcountb = nzcount;
147335aab85fSBarry Smith     nmask    = 0;
1474b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1475b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1476b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
147735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
147835aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1479b6490206SBarry Smith       }
1480b6490206SBarry Smith       rowcount++;
1481b6490206SBarry Smith     }
1482de6a44a3SBarry Smith     /* sort the masked values */
148377c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1484de6a44a3SBarry Smith 
1485b6490206SBarry Smith     /* set "j" values into matrix */
1486b6490206SBarry Smith     maskcount = 1;
148735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
148835aab85fSBarry Smith       a->j[jcount++]  = masked[j];
1489de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
1490b6490206SBarry Smith     }
1491b6490206SBarry Smith     /* set "a" values into matrix */
1492de6a44a3SBarry Smith     ishift = bs2*a->i[i];
1493b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1494b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1495b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
1496de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
1497de6a44a3SBarry Smith         block  = mask[tmp] - 1;
1498de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
1499de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
1500b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
1501b6490206SBarry Smith       }
1502b6490206SBarry Smith     }
150335aab85fSBarry Smith     /* zero out the mask elements we set */
150435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1505b6490206SBarry Smith   }
150635aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1507b6490206SBarry Smith 
1508b6490206SBarry Smith   PetscFree(rowlengths);
1509b6490206SBarry Smith   PetscFree(browlengths);
1510b6490206SBarry Smith   PetscFree(aa);
1511b6490206SBarry Smith   PetscFree(jj);
1512b6490206SBarry Smith   PetscFree(mask);
1513b6490206SBarry Smith 
1514b6490206SBarry Smith   B->assembled = PETSC_TRUE;
1515de6a44a3SBarry Smith 
1516de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1517de6a44a3SBarry Smith   if (flg) {
151819bcc07fSBarry Smith     Viewer tviewer;
151919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
152090ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
152119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
152219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1523de6a44a3SBarry Smith   }
1524de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1525de6a44a3SBarry Smith   if (flg) {
152619bcc07fSBarry Smith     Viewer tviewer;
152719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
152890ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
152919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
153019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1531de6a44a3SBarry Smith   }
1532de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1533de6a44a3SBarry Smith   if (flg) {
153419bcc07fSBarry Smith     Viewer tviewer;
153519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
153619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
153719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1538de6a44a3SBarry Smith   }
1539de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1540de6a44a3SBarry Smith   if (flg) {
154119bcc07fSBarry Smith     Viewer tviewer;
154219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
154390ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
154419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
154519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1546de6a44a3SBarry Smith   }
1547de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1548de6a44a3SBarry Smith   if (flg) {
154919bcc07fSBarry Smith     Viewer tviewer;
155019bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
155119bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
155219bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
155319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1554de6a44a3SBarry Smith   }
15552593348eSBarry Smith   return 0;
15562593348eSBarry Smith }
15572593348eSBarry Smith 
15582593348eSBarry Smith 
15592593348eSBarry Smith 
1560