xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1a6a6d987255f88adf325b5bb4a1e46559915718)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*1a6a6d98SBarry Smith static char vcid[] = "$Id: baij.c,v 1.39 1996/04/16 13:48:15 balay Exp bsmith $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
11*1a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
12*1a6a6d98SBarry Smith #include "src/inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
143b547af2SSatish Balay 
15bcd2baecSBarry Smith extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41a7c10996SSatish Balay   ishift = 0;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
44*1a6a6d98SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
45*1a6a6d98SBarry Smith     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
462593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
47bcd2baecSBarry Smith   } else {
48bcd2baecSBarry Smith     if (ishift == oshift) {
49*1a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50bcd2baecSBarry Smith     }
51bcd2baecSBarry Smith     else if (ishift == -1) {
52bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
53*1a6a6d98SBarry Smith       int nz = a->i[n] - 1;
54bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
55*1a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
56*1a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
58*1a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
59bcd2baecSBarry Smith     } else {
60bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
61*1a6a6d98SBarry Smith       int nz = a->i[n] - 1;
62bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
63*1a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
64*1a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
66*1a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
67bcd2baecSBarry Smith     }
68bcd2baecSBarry Smith   }
692593348eSBarry Smith   return 0;
702593348eSBarry Smith }
712593348eSBarry Smith 
72de6a44a3SBarry Smith /*
73de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
74de6a44a3SBarry Smith */
75de6a44a3SBarry Smith 
76de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
77de6a44a3SBarry Smith {
78de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
797fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
80de6a44a3SBarry Smith 
81de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
837fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
84de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85de6a44a3SBarry Smith       if (a->j[j] == i) {
86de6a44a3SBarry Smith         diag[i] = j;
87de6a44a3SBarry Smith         break;
88de6a44a3SBarry Smith       }
89de6a44a3SBarry Smith     }
90de6a44a3SBarry Smith   }
91de6a44a3SBarry Smith   a->diag = diag;
92de6a44a3SBarry Smith   return 0;
93de6a44a3SBarry Smith }
942593348eSBarry Smith 
952593348eSBarry Smith #include "draw.h"
962593348eSBarry Smith #include "pinclude/pviewer.h"
9777c4ece6SBarry Smith #include "sys.h"
982593348eSBarry Smith 
99b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1002593348eSBarry Smith {
101b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1029df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103b6490206SBarry Smith   Scalar      *aa;
1042593348eSBarry Smith 
10590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1062593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1072593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1082593348eSBarry Smith   col_lens[1] = a->m;
1092593348eSBarry Smith   col_lens[2] = a->n;
1107e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1112593348eSBarry Smith 
1122593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
113b6490206SBarry Smith   count = 0;
114b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
115b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
116b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117b6490206SBarry Smith     }
1182593348eSBarry Smith   }
11977c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1202593348eSBarry Smith   PetscFree(col_lens);
1212593348eSBarry Smith 
1222593348eSBarry Smith   /* store column indices (zero start index) */
1237e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124b6490206SBarry Smith   count = 0;
125b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
126b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
127b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
129b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1302593348eSBarry Smith         }
1312593348eSBarry Smith       }
132b6490206SBarry Smith     }
133b6490206SBarry Smith   }
1347e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135b6490206SBarry Smith   PetscFree(jj);
1362593348eSBarry Smith 
1372593348eSBarry Smith   /* store nonzero values */
1387e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139b6490206SBarry Smith   count = 0;
140b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
141b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
142b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1447e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
145b6490206SBarry Smith         }
146b6490206SBarry Smith       }
147b6490206SBarry Smith     }
148b6490206SBarry Smith   }
1497e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150b6490206SBarry Smith   PetscFree(aa);
1512593348eSBarry Smith   return 0;
1522593348eSBarry Smith }
1532593348eSBarry Smith 
154b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1552593348eSBarry Smith {
156b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1579df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1582593348eSBarry Smith   FILE        *fd;
1592593348eSBarry Smith   char        *outputname;
1602593348eSBarry Smith 
16190ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1622593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16390ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
1647ddc982cSLois Curfman McInnes   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
1657ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1662593348eSBarry Smith   }
16790ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
168b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1692593348eSBarry Smith   }
17044cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17144cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17244cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17344cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17444cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17544cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17644cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17744cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17844cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17944cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18144cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18244cd7ae7SLois Curfman McInnes #else
18344cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18444cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18544cd7ae7SLois Curfman McInnes #endif
18644cd7ae7SLois Curfman McInnes           }
18744cd7ae7SLois Curfman McInnes         }
18844cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18944cd7ae7SLois Curfman McInnes       }
19044cd7ae7SLois Curfman McInnes     }
19144cd7ae7SLois Curfman McInnes   }
1922593348eSBarry Smith   else {
193b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
194b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
195b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
196b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19888685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1997e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
20088685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2017e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20288685aaeSLois Curfman McInnes           }
20388685aaeSLois Curfman McInnes           else {
2047e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20588685aaeSLois Curfman McInnes           }
20688685aaeSLois Curfman McInnes #else
2077e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20888685aaeSLois Curfman McInnes #endif
2092593348eSBarry Smith           }
2102593348eSBarry Smith         }
2112593348eSBarry Smith         fprintf(fd,"\n");
2122593348eSBarry Smith       }
2132593348eSBarry Smith     }
214b6490206SBarry Smith   }
2152593348eSBarry Smith   fflush(fd);
2162593348eSBarry Smith   return 0;
2172593348eSBarry Smith }
2182593348eSBarry Smith 
219b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
2202593348eSBarry Smith {
2212593348eSBarry Smith   Mat         A = (Mat) obj;
22219bcc07fSBarry Smith   ViewerType  vtype;
22319bcc07fSBarry Smith   int         ierr;
2242593348eSBarry Smith 
2252593348eSBarry Smith   if (!viewer) {
22619bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
2272593348eSBarry Smith   }
22819bcc07fSBarry Smith 
22919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
23019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
231b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
2322593348eSBarry Smith   }
23319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
2352593348eSBarry Smith   }
23619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
237b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
2382593348eSBarry Smith   }
23919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
240b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
2412593348eSBarry Smith   }
2422593348eSBarry Smith   return 0;
2432593348eSBarry Smith }
244b6490206SBarry Smith 
245119a934aSSatish Balay #define CHUNKSIZE  10
246cd0e1443SSatish Balay 
247cd0e1443SSatish Balay /* This version has row oriented v  */
248cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249cd0e1443SSatish Balay {
250cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
2549df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
255cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
256cd0e1443SSatish Balay 
257cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
258cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
259cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
263cd0e1443SSatish Balay     low = 0;
264cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
265cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
268cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
269cd0e1443SSatish Balay       if (roworiented) {
270cd0e1443SSatish Balay         value = *v++;
271cd0e1443SSatish Balay       }
272cd0e1443SSatish Balay       else {
273cd0e1443SSatish Balay         value = v[k + l*m];
274cd0e1443SSatish Balay       }
275cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
276cd0e1443SSatish Balay       while (high-low > 5) {
277cd0e1443SSatish Balay         t = (low+high)/2;
278cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
279cd0e1443SSatish Balay         else             low  = t;
280cd0e1443SSatish Balay       }
281cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
282cd0e1443SSatish Balay         if (rp[i] > bcol) break;
283cd0e1443SSatish Balay         if (rp[i] == bcol) {
2847e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
285cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
286cd0e1443SSatish Balay           else                  *bap  = value;
287cd0e1443SSatish Balay           goto noinsert;
288cd0e1443SSatish Balay         }
289cd0e1443SSatish Balay       }
290cd0e1443SSatish Balay       if (nonew) goto noinsert;
291cd0e1443SSatish Balay       if (nrow >= rmax) {
292cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
293cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294cd0e1443SSatish Balay         Scalar *new_a;
295cd0e1443SSatish Balay 
296cd0e1443SSatish Balay         /* malloc new storage space */
2977e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
2997e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
300cd0e1443SSatish Balay         new_i   = new_j + new_nz;
301cd0e1443SSatish Balay 
302cd0e1443SSatish Balay         /* copy over old data into new slots */
303cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
3047e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308cd0e1443SSatish Balay                                                            len*sizeof(int));
309a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313cd0e1443SSatish Balay         /* free up old matrix storage */
314cd0e1443SSatish Balay         PetscFree(a->a);
315cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317cd0e1443SSatish Balay         a->singlemalloc = 1;
318cd0e1443SSatish Balay 
319a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
3217e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
322119a934aSSatish Balay         a->maxnz += CHUNKSIZE;
323cd0e1443SSatish Balay         a->reallocs++;
324119a934aSSatish Balay         a->nz++;
325cd0e1443SSatish Balay       }
3267e67e3f9SSatish Balay       N = nrow++ - 1;
327cd0e1443SSatish Balay       /* shift up all the later entries in this row */
328cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
329cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
3307e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331cd0e1443SSatish Balay       }
3327e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333cd0e1443SSatish Balay       rp[i] = bcol;
3347e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
335cd0e1443SSatish Balay       noinsert:;
336cd0e1443SSatish Balay       low = i;
337cd0e1443SSatish Balay     }
338cd0e1443SSatish Balay     ailen[brow] = nrow;
339cd0e1443SSatish Balay   }
340cd0e1443SSatish Balay   return 0;
341cd0e1443SSatish Balay }
342cd0e1443SSatish Balay 
3430b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
3440b824a48SSatish Balay {
3450b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3460b824a48SSatish Balay   *m = a->m; *n = a->n;
3470b824a48SSatish Balay   return 0;
3480b824a48SSatish Balay }
3490b824a48SSatish Balay 
3500b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
3510b824a48SSatish Balay {
3520b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3530b824a48SSatish Balay   *m = 0; *n = a->m;
3540b824a48SSatish Balay   return 0;
3550b824a48SSatish Balay }
3560b824a48SSatish Balay 
3579867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3589867e207SSatish Balay {
3599867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3607e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
3619867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
3629867e207SSatish Balay 
3639867e207SSatish Balay   bs  = a->bs;
3649867e207SSatish Balay   ai  = a->i;
3659867e207SSatish Balay   aj  = a->j;
3669867e207SSatish Balay   aa  = a->a;
3679df24120SSatish Balay   bs2 = a->bs2;
3689867e207SSatish Balay 
3699867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
3709867e207SSatish Balay 
3719867e207SSatish Balay   bn  = row/bs;   /* Block number */
3729867e207SSatish Balay   bp  = row % bs; /* Block Position */
3739867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
3749867e207SSatish Balay   *nz = bs*M;
3759867e207SSatish Balay 
3769867e207SSatish Balay   if (v) {
3779867e207SSatish Balay     *v = 0;
3789867e207SSatish Balay     if (*nz) {
3799867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
3809867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3819867e207SSatish Balay         v_i  = *v + i*bs;
3827e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
3837e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
3849867e207SSatish Balay       }
3859867e207SSatish Balay     }
3869867e207SSatish Balay   }
3879867e207SSatish Balay 
3889867e207SSatish Balay   if (idx) {
3899867e207SSatish Balay     *idx = 0;
3909867e207SSatish Balay     if (*nz) {
3919867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
3929867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3939867e207SSatish Balay         idx_i = *idx + i*bs;
3949867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
3959867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
3969867e207SSatish Balay       }
3979867e207SSatish Balay     }
3989867e207SSatish Balay   }
3999867e207SSatish Balay   return 0;
4009867e207SSatish Balay }
4019867e207SSatish Balay 
4029867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4039867e207SSatish Balay {
4049867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
4059867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
4069867e207SSatish Balay   return 0;
4079867e207SSatish Balay }
408b6490206SBarry Smith 
409f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410f2501298SSatish Balay {
411f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412f2501298SSatish Balay   Mat         C;
413f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
4149df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
415f2501298SSatish Balay   Scalar      *array=a->a;
416f2501298SSatish Balay 
417f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
418f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
421a7c10996SSatish Balay 
422a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424f2501298SSatish Balay   PetscFree(col);
425f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426f2501298SSatish Balay   cols = rows + bs;
427f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
428f2501298SSatish Balay     cols[0] = i*bs;
429f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430f2501298SSatish Balay     len = ai[i+1] - ai[i];
431f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
432f2501298SSatish Balay       rows[0] = (*aj++)*bs;
433f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435f2501298SSatish Balay       array += bs2;
436f2501298SSatish Balay     }
437f2501298SSatish Balay   }
4381073c447SSatish Balay   PetscFree(rows);
439f2501298SSatish Balay 
440f2501298SSatish Balay   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441f2501298SSatish Balay   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442f2501298SSatish Balay 
443f2501298SSatish Balay   if (B != PETSC_NULL) {
444f2501298SSatish Balay     *B = C;
445f2501298SSatish Balay   } else {
446f2501298SSatish Balay     /* This isn't really an in-place transpose */
447f2501298SSatish Balay     PetscFree(a->a);
448f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
450f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
451f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
452f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
453f2501298SSatish Balay     PetscFree(a);
454f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
455f2501298SSatish Balay     PetscHeaderDestroy(C);
456f2501298SSatish Balay   }
457f2501298SSatish Balay   return 0;
458f2501298SSatish Balay }
459f2501298SSatish Balay 
460f2501298SSatish Balay 
461584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462584200bdSSatish Balay {
463584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
4669df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
467584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
468584200bdSSatish Balay 
469584200bdSSatish Balay   if (mode == FLUSH_ASSEMBLY) return 0;
470584200bdSSatish Balay 
471584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
472584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
473584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
474584200bdSSatish Balay     if (fshift) {
475a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476584200bdSSatish Balay       N = ailen[i];
477584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
478584200bdSSatish Balay         ip[j-fshift] = ip[j];
4797e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480584200bdSSatish Balay       }
481584200bdSSatish Balay     }
482584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
483584200bdSSatish Balay   }
484584200bdSSatish Balay   if (mbs) {
485584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
486584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487584200bdSSatish Balay   }
488584200bdSSatish Balay   /* reset ilen and imax for each row */
489584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
490584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
491584200bdSSatish Balay   }
492a7c10996SSatish Balay   a->nz = ai[mbs];
493584200bdSSatish Balay 
494584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
495584200bdSSatish Balay   if (fshift && a->diag) {
496584200bdSSatish Balay     PetscFree(a->diag);
497584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
498584200bdSSatish Balay     a->diag = 0;
499584200bdSSatish Balay   }
50067790700SSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502584200bdSSatish Balay            a->reallocs);
503584200bdSSatish Balay   return 0;
504584200bdSSatish Balay }
505584200bdSSatish Balay 
506b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
5072593348eSBarry Smith {
508b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5099df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
5102593348eSBarry Smith   return 0;
5112593348eSBarry Smith }
5122593348eSBarry Smith 
513b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
5142593348eSBarry Smith {
5152593348eSBarry Smith   Mat         A  = (Mat) obj;
516b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5172593348eSBarry Smith 
5182593348eSBarry Smith #if defined(PETSC_LOG)
5192593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
5202593348eSBarry Smith #endif
5212593348eSBarry Smith   PetscFree(a->a);
5222593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
5232593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
5242593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
5252593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
5262593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
527de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
5282593348eSBarry Smith   PetscFree(a);
529f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
530f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
5312593348eSBarry Smith   return 0;
5322593348eSBarry Smith }
5332593348eSBarry Smith 
534b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
5352593348eSBarry Smith {
536b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5372593348eSBarry Smith   if      (op == ROW_ORIENTED)              a->roworiented = 1;
5382593348eSBarry Smith   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
5392593348eSBarry Smith   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
5402593348eSBarry Smith   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
5412593348eSBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
5422593348eSBarry Smith   else if (op == ROWS_SORTED ||
5432593348eSBarry Smith            op == SYMMETRIC_MATRIX ||
5442593348eSBarry Smith            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5452593348eSBarry Smith            op == YES_NEW_DIAGONALS)
54694a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
5472593348eSBarry Smith   else if (op == NO_NEW_DIAGONALS)
548b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
5492593348eSBarry Smith   else
550b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
5512593348eSBarry Smith   return 0;
5522593348eSBarry Smith }
5532593348eSBarry Smith 
5542593348eSBarry Smith 
5552593348eSBarry Smith /* -------------------------------------------------------*/
5562593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
5572593348eSBarry Smith /* -------------------------------------------------------*/
558b6490206SBarry Smith #include "pinclude/plapack.h"
559b6490206SBarry Smith 
560*1a6a6d98SBarry Smith static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz)
5612593348eSBarry Smith {
562b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
563*1a6a6d98SBarry Smith   Scalar          *xg,*zg;
564bb42667fSSatish Balay   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
565b6490206SBarry Smith   register Scalar x1,x2,x3,x4,x5;
566*1a6a6d98SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
5679df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,ierr;
5682593348eSBarry Smith 
569bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
570bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
571b6490206SBarry Smith 
572119a934aSSatish Balay   idx   = a->j;
573119a934aSSatish Balay   v     = a->a;
574119a934aSSatish Balay   ii    = a->i;
575119a934aSSatish Balay 
576119a934aSSatish Balay   switch (bs) {
577119a934aSSatish Balay   case 1:
578119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
579119a934aSSatish Balay       n    = ii[1] - ii[0]; ii++;
580119a934aSSatish Balay       sum  = 0.0;
581119a934aSSatish Balay       while (n--) sum += *v++ * x[*idx++];
582*1a6a6d98SBarry Smith       z[i] = sum;
583119a934aSSatish Balay     }
584119a934aSSatish Balay     break;
585119a934aSSatish Balay   case 2:
586119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
587119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
588119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0;
589119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
590119a934aSSatish Balay         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
591119a934aSSatish Balay         sum1 += v[0]*x1 + v[2]*x2;
592119a934aSSatish Balay         sum2 += v[1]*x1 + v[3]*x2;
593119a934aSSatish Balay         v += 4;
594119a934aSSatish Balay       }
595*1a6a6d98SBarry Smith       z[0] = sum1; z[1] = sum2;
596119a934aSSatish Balay       z += 2;
597119a934aSSatish Balay     }
598119a934aSSatish Balay     break;
599119a934aSSatish Balay   case 3:
600119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
601119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
602119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
603119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
604119a934aSSatish Balay         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
605119a934aSSatish Balay         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
606119a934aSSatish Balay         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
607119a934aSSatish Balay         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
608119a934aSSatish Balay         v += 9;
609119a934aSSatish Balay       }
610*1a6a6d98SBarry Smith       z[0] = sum1; z[1] = sum2; z[2] = sum3;
611119a934aSSatish Balay       z += 3;
612119a934aSSatish Balay     }
613119a934aSSatish Balay     break;
614119a934aSSatish Balay   case 4:
615119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
616119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
617119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
618119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
619119a934aSSatish Balay         xb = x + 4*(*idx++);
620119a934aSSatish Balay         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
621119a934aSSatish Balay         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
622119a934aSSatish Balay         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
623119a934aSSatish Balay         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
624119a934aSSatish Balay         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
625119a934aSSatish Balay         v += 16;
626119a934aSSatish Balay       }
627*1a6a6d98SBarry Smith       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
628119a934aSSatish Balay       z += 4;
629119a934aSSatish Balay     }
630119a934aSSatish Balay     break;
631119a934aSSatish Balay   case 5:
632119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
633119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
634119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
635119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
636119a934aSSatish Balay         xb = x + 5*(*idx++);
637119a934aSSatish Balay         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
638119a934aSSatish Balay         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
639119a934aSSatish Balay         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
640119a934aSSatish Balay         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
641119a934aSSatish Balay         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
642119a934aSSatish Balay         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
643119a934aSSatish Balay         v += 25;
644119a934aSSatish Balay       }
645*1a6a6d98SBarry Smith       z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
646119a934aSSatish Balay       z += 5;
647119a934aSSatish Balay     }
648119a934aSSatish Balay     break;
649119a934aSSatish Balay     /* block sizes larger then 5 by 5 are handled by BLAS */
650119a934aSSatish Balay   default: {
651*1a6a6d98SBarry Smith       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
652119a934aSSatish Balay       if (!a->mult_work) {
6533b547af2SSatish Balay         k = PetscMax(a->m,a->n);
654bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
655119a934aSSatish Balay         CHKPTRQ(a->mult_work);
656119a934aSSatish Balay       }
657119a934aSSatish Balay       work = a->mult_work;
658119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
659119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
660119a934aSSatish Balay         ncols = n*bs;
661119a934aSSatish Balay         workt = work;
662119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
663119a934aSSatish Balay           xb = x + bs*(*idx++);
664119a934aSSatish Balay           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
665119a934aSSatish Balay           workt += bs;
666119a934aSSatish Balay         }
667*1a6a6d98SBarry Smith         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
668119a934aSSatish Balay         v += n*bs2;
669119a934aSSatish Balay         z += bs;
670119a934aSSatish Balay       }
671119a934aSSatish Balay     }
672119a934aSSatish Balay   }
6730419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
6740419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
675*1a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
676bb42667fSSatish Balay   return 0;
677bb42667fSSatish Balay }
678bb42667fSSatish Balay 
679*1a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
680bb42667fSSatish Balay {
681bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
682*1a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
683bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
684*1a6a6d98SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
6859df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
686119a934aSSatish Balay 
687119a934aSSatish Balay 
688bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
689bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
690bb42667fSSatish Balay 
691119a934aSSatish Balay   idx   = a->j;
692119a934aSSatish Balay   v     = a->a;
693119a934aSSatish Balay   ii    = a->i;
694119a934aSSatish Balay 
695119a934aSSatish Balay   switch (bs) {
696119a934aSSatish Balay   case 1:
697119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
698119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
699119a934aSSatish Balay       xb = x + i; x1 = xb[0];
700119a934aSSatish Balay       ib = idx + ai[i];
701119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
702*1a6a6d98SBarry Smith         z[ib[j]] = *v++ * x1;
703119a934aSSatish Balay       }
704119a934aSSatish Balay     }
705119a934aSSatish Balay     break;
706119a934aSSatish Balay   case 2:
707119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
708119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
709119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
710119a934aSSatish Balay       ib = idx + ai[i];
711119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
712119a934aSSatish Balay         rval = ib[j]*2;
713*1a6a6d98SBarry Smith         z[rval++] = v[0]*x1 + v[1]*x2;
714*1a6a6d98SBarry Smith         z[rval++] = v[2]*x1 + v[3]*x2;
715119a934aSSatish Balay         v += 4;
716119a934aSSatish Balay       }
717119a934aSSatish Balay     }
718119a934aSSatish Balay     break;
719119a934aSSatish Balay   case 3:
720119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
721119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
722119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
723119a934aSSatish Balay       ib = idx + ai[i];
724119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
725119a934aSSatish Balay         rval = ib[j]*3;
726*1a6a6d98SBarry Smith         z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3;
727*1a6a6d98SBarry Smith         z[rval++] = v[3]*x1 + v[4]*x2 + v[5]*x3;
728*1a6a6d98SBarry Smith         z[rval++] = v[6]*x1 + v[7]*x2 + v[8]*x3;
729119a934aSSatish Balay         v += 9;
730119a934aSSatish Balay       }
731119a934aSSatish Balay     }
732119a934aSSatish Balay     break;
733119a934aSSatish Balay   case 4:
734119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
735119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
736119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
737119a934aSSatish Balay       ib = idx + ai[i];
738119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
739119a934aSSatish Balay         rval = ib[j]*4;
740*1a6a6d98SBarry Smith         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
741*1a6a6d98SBarry Smith         z[rval++] =  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
742*1a6a6d98SBarry Smith         z[rval++] =  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
743*1a6a6d98SBarry Smith         z[rval++] = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
744119a934aSSatish Balay         v += 16;
745119a934aSSatish Balay       }
746119a934aSSatish Balay     }
747119a934aSSatish Balay     break;
748119a934aSSatish Balay   case 5:
749119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
750119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
751119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
752119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
753119a934aSSatish Balay       ib = idx + ai[i];
754119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
755119a934aSSatish Balay         rval = ib[j]*5;
756*1a6a6d98SBarry Smith         z[rval++] =  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
757*1a6a6d98SBarry Smith         z[rval++] =  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
758*1a6a6d98SBarry Smith         z[rval++] = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
759*1a6a6d98SBarry Smith         z[rval++] = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
760*1a6a6d98SBarry Smith         z[rval++] = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
761119a934aSSatish Balay         v += 25;
762119a934aSSatish Balay       }
763119a934aSSatish Balay     }
764119a934aSSatish Balay     break;
765119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
766119a934aSSatish Balay     default: {
767119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
768119a934aSSatish Balay       if (!a->mult_work) {
7693b547af2SSatish Balay         k = PetscMax(a->m,a->n);
770bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
771119a934aSSatish Balay         CHKPTRQ(a->mult_work);
772119a934aSSatish Balay       }
773119a934aSSatish Balay       work = a->mult_work;
774119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
775119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
776119a934aSSatish Balay         ncols = n*bs;
777119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
778119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
779119a934aSSatish Balay         v += n*bs2;
780119a934aSSatish Balay         x += bs;
781119a934aSSatish Balay         workt = work;
782119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
783119a934aSSatish Balay           zb = z + bs*(*idx++);
784*1a6a6d98SBarry Smith           for ( k=0; k<bs; k++ ) zb[k] = workt[k] ;
785119a934aSSatish Balay           workt += bs;
786119a934aSSatish Balay         }
787119a934aSSatish Balay       }
788119a934aSSatish Balay     }
789119a934aSSatish Balay   }
7900419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
7910419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
7922593348eSBarry Smith   return 0;
7932593348eSBarry Smith }
7942593348eSBarry Smith 
795de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
7962593348eSBarry Smith {
797b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7989df24120SSatish Balay   if (nz)  *nz  = a->bs2*a->nz;
799bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
800bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
8012593348eSBarry Smith   return 0;
8022593348eSBarry Smith }
8032593348eSBarry Smith 
80491d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
80591d316f6SSatish Balay {
80691d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
80791d316f6SSatish Balay 
80891d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
80991d316f6SSatish Balay 
81091d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
81191d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
812a7c10996SSatish Balay       (a->nz != b->nz)) {
81391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
81491d316f6SSatish Balay   }
81591d316f6SSatish Balay 
81691d316f6SSatish Balay   /* if the a->i are the same */
81791d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
81891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
81991d316f6SSatish Balay   }
82091d316f6SSatish Balay 
82191d316f6SSatish Balay   /* if a->j are the same */
82291d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
82391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
82491d316f6SSatish Balay   }
82591d316f6SSatish Balay 
82691d316f6SSatish Balay   /* if a->a are the same */
82791d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
82891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
82991d316f6SSatish Balay   }
83091d316f6SSatish Balay   *flg = PETSC_TRUE;
83191d316f6SSatish Balay   return 0;
83291d316f6SSatish Balay 
83391d316f6SSatish Balay }
83491d316f6SSatish Balay 
83591d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
83691d316f6SSatish Balay {
83791d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8387e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
83917e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
84017e48fc4SSatish Balay 
84117e48fc4SSatish Balay   bs   = a->bs;
84217e48fc4SSatish Balay   aa   = a->a;
84317e48fc4SSatish Balay   ai   = a->i;
84417e48fc4SSatish Balay   aj   = a->j;
84517e48fc4SSatish Balay   ambs = a->mbs;
8469df24120SSatish Balay   bs2  = a->bs2;
84791d316f6SSatish Balay 
84891d316f6SSatish Balay   VecSet(&zero,v);
84991d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
8509867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
85117e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
85217e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
85317e48fc4SSatish Balay       if (aj[j] == i) {
85417e48fc4SSatish Balay         row  = i*bs;
8557e67e3f9SSatish Balay         aa_j = aa+j*bs2;
8567e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
85791d316f6SSatish Balay         break;
85891d316f6SSatish Balay       }
85991d316f6SSatish Balay     }
86091d316f6SSatish Balay   }
86191d316f6SSatish Balay   return 0;
86291d316f6SSatish Balay }
86391d316f6SSatish Balay 
8649867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
8659867e207SSatish Balay {
8669867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8679867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
8687e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
8699867e207SSatish Balay 
8709867e207SSatish Balay   ai  = a->i;
8719867e207SSatish Balay   aj  = a->j;
8729867e207SSatish Balay   aa  = a->a;
8739867e207SSatish Balay   m   = a->m;
8749867e207SSatish Balay   n   = a->n;
8759867e207SSatish Balay   bs  = a->bs;
8769867e207SSatish Balay   mbs = a->mbs;
8779df24120SSatish Balay   bs2 = a->bs2;
8789867e207SSatish Balay   if (ll) {
8799867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
8809867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
8819867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
8829867e207SSatish Balay       M  = ai[i+1] - ai[i];
8839867e207SSatish Balay       li = l + i*bs;
8847e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
8859867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
8867e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
8879867e207SSatish Balay           (*v++) *= li[k%bs];
8889867e207SSatish Balay         }
8899867e207SSatish Balay       }
8909867e207SSatish Balay     }
8919867e207SSatish Balay   }
8929867e207SSatish Balay 
8939867e207SSatish Balay   if (rr) {
8949867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
8959867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
8969867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
8979867e207SSatish Balay       M  = ai[i+1] - ai[i];
8987e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
8999867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
9009867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
9019867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
9029867e207SSatish Balay           x = ri[k];
9039867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
9049867e207SSatish Balay         }
9059867e207SSatish Balay       }
9069867e207SSatish Balay     }
9079867e207SSatish Balay   }
9089867e207SSatish Balay   return 0;
9099867e207SSatish Balay }
9109867e207SSatish Balay 
9119867e207SSatish Balay 
912b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
913b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
914*1a6a6d98SBarry Smith 
915*1a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
916*1a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
917*1a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
918*1a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
919*1a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
920*1a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
921*1a6a6d98SBarry Smith 
9227fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
9237fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
9247fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
9257fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
9267fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
9277fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
9282593348eSBarry Smith 
929b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
9302593348eSBarry Smith {
931b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9322593348eSBarry Smith   Scalar      *v = a->a;
9332593348eSBarry Smith   double      sum = 0.0;
9349df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
9352593348eSBarry Smith 
9362593348eSBarry Smith   if (type == NORM_FROBENIUS) {
9370419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
9382593348eSBarry Smith #if defined(PETSC_COMPLEX)
9392593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
9402593348eSBarry Smith #else
9412593348eSBarry Smith       sum += (*v)*(*v); v++;
9422593348eSBarry Smith #endif
9432593348eSBarry Smith     }
9442593348eSBarry Smith     *norm = sqrt(sum);
9452593348eSBarry Smith   }
9462593348eSBarry Smith   else {
947b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
9482593348eSBarry Smith   }
9492593348eSBarry Smith   return 0;
9502593348eSBarry Smith }
9512593348eSBarry Smith 
9522593348eSBarry Smith /*
9532593348eSBarry Smith      note: This can only work for identity for row and col. It would
9542593348eSBarry Smith    be good to check this and otherwise generate an error.
9552593348eSBarry Smith */
956b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
9572593348eSBarry Smith {
958b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
9592593348eSBarry Smith   Mat         outA;
960de6a44a3SBarry Smith   int         ierr;
9612593348eSBarry Smith 
962b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
9632593348eSBarry Smith 
9642593348eSBarry Smith   outA          = inA;
9652593348eSBarry Smith   inA->factor   = FACTOR_LU;
9662593348eSBarry Smith   a->row        = row;
9672593348eSBarry Smith   a->col        = col;
9682593348eSBarry Smith 
969de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
9702593348eSBarry Smith 
9712593348eSBarry Smith   if (!a->diag) {
972b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
9732593348eSBarry Smith   }
9747fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
9752593348eSBarry Smith   return 0;
9762593348eSBarry Smith }
9772593348eSBarry Smith 
978b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
9792593348eSBarry Smith {
980b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
9819df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
982b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
983b6490206SBarry Smith   PLogFlops(totalnz);
9842593348eSBarry Smith   return 0;
9852593348eSBarry Smith }
9862593348eSBarry Smith 
9877e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
9887e67e3f9SSatish Balay {
9897e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9907e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
991a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
9929df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
9937e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
9947e67e3f9SSatish Balay 
9957e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
9967e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
9977e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
9987e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
999a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
10007e67e3f9SSatish Balay     nrow = ailen[brow];
10017e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
10027e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
10037e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1004a7c10996SSatish Balay       col  = in[l] ;
10057e67e3f9SSatish Balay       bcol = col/bs;
10067e67e3f9SSatish Balay       cidx = col%bs;
10077e67e3f9SSatish Balay       ridx = row%bs;
10087e67e3f9SSatish Balay       high = nrow;
10097e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
10107e67e3f9SSatish Balay       while (high-low > 5) {
10117e67e3f9SSatish Balay         t = (low+high)/2;
10127e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
10137e67e3f9SSatish Balay         else             low  = t;
10147e67e3f9SSatish Balay       }
10157e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
10167e67e3f9SSatish Balay         if (rp[i] > bcol) break;
10177e67e3f9SSatish Balay         if (rp[i] == bcol) {
10187e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
10197e67e3f9SSatish Balay           goto finished;
10207e67e3f9SSatish Balay         }
10217e67e3f9SSatish Balay       }
10227e67e3f9SSatish Balay       *v++ = zero;
10237e67e3f9SSatish Balay       finished:;
10247e67e3f9SSatish Balay     }
10257e67e3f9SSatish Balay   }
10267e67e3f9SSatish Balay   return 0;
10277e67e3f9SSatish Balay }
10287e67e3f9SSatish Balay 
10292593348eSBarry Smith /* -------------------------------------------------------------------*/
1030cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
10319867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1032*1a6a6d98SBarry Smith        MatMult_SeqBAIJ,0,
1033*1a6a6d98SBarry Smith        MatMultTrans_SeqBAIJ,0,
1034*1a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1035b6490206SBarry Smith        0,0,
1036de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1037b6490206SBarry Smith        0,
1038f2501298SSatish Balay        MatTranspose_SeqBAIJ,
103917e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
10409867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1041584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1042b6490206SBarry Smith        0,
1043b6490206SBarry Smith        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1044b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
10457fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1046b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1047de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1048b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1049b6490206SBarry Smith        0,0,
1050b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1051b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1052b6490206SBarry Smith        0,0,
10537e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1054*1a6a6d98SBarry Smith        0,MatScale_SeqBAIJ,
1055b6490206SBarry Smith        0};
10562593348eSBarry Smith 
10572593348eSBarry Smith /*@C
105844cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
105944cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
106044cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
10612593348eSBarry Smith    (or nzz).  By setting these parameters accurately, performance can be
10622593348eSBarry Smith    increased by more than a factor of 50.
10632593348eSBarry Smith 
10642593348eSBarry Smith    Input Parameters:
10652593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1066b6490206SBarry Smith .  bs - size of block
10672593348eSBarry Smith .  m - number of rows
10682593348eSBarry Smith .  n - number of columns
1069b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
1070b6490206SBarry Smith .  nzz - number of block nonzeros per block row or PETSC_NULL
1071b6490206SBarry Smith          (possibly different for each row)
10722593348eSBarry Smith 
10732593348eSBarry Smith    Output Parameter:
10742593348eSBarry Smith .  A - the matrix
10752593348eSBarry Smith 
10762593348eSBarry Smith    Notes:
107744cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
10782593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
107944cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
10802593348eSBarry Smith 
10812593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
10822593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
10832593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
10842593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
10852593348eSBarry Smith 
108644cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
10872593348eSBarry Smith @*/
1088b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
10892593348eSBarry Smith {
10902593348eSBarry Smith   Mat         B;
1091b6490206SBarry Smith   Mat_SeqBAIJ *b;
1092f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1093b6490206SBarry Smith 
1094f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1095f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
10962593348eSBarry Smith 
10972593348eSBarry Smith   *A = 0;
1098b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
10992593348eSBarry Smith   PLogObjectCreate(B);
1100b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
110144cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
11022593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1103*1a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1104*1a6a6d98SBarry Smith   if (!flg) {
11057fc0212eSBarry Smith     switch (bs) {
11067fc0212eSBarry Smith       case 1:
11077fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1108*1a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
11097fc0212eSBarry Smith         break;
11104eeb42bcSBarry Smith       case 2:
11114eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1112*1a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
11136d84be18SBarry Smith         break;
1114f327dd97SBarry Smith       case 3:
1115f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1116*1a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
11174eeb42bcSBarry Smith         break;
11186d84be18SBarry Smith       case 4:
11196d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1120*1a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
11216d84be18SBarry Smith         break;
11226d84be18SBarry Smith       case 5:
11236d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1124*1a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
11256d84be18SBarry Smith         break;
11267fc0212eSBarry Smith     }
1127*1a6a6d98SBarry Smith   }
1128b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1129b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
11302593348eSBarry Smith   B->factor           = 0;
11312593348eSBarry Smith   B->lupivotthreshold = 1.0;
11322593348eSBarry Smith   b->row              = 0;
11332593348eSBarry Smith   b->col              = 0;
11342593348eSBarry Smith   b->reallocs         = 0;
11352593348eSBarry Smith 
113644cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
113744cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1138b6490206SBarry Smith   b->mbs     = mbs;
1139f2501298SSatish Balay   b->nbs     = nbs;
1140b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
11412593348eSBarry Smith   if (nnz == PETSC_NULL) {
1142119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
11432593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1144b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1145b6490206SBarry Smith     nz = nz*mbs;
11462593348eSBarry Smith   }
11472593348eSBarry Smith   else {
11482593348eSBarry Smith     nz = 0;
1149b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
11502593348eSBarry Smith   }
11512593348eSBarry Smith 
11522593348eSBarry Smith   /* allocate the matrix space */
11537e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
11542593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
11557e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
11567e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
11572593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
11582593348eSBarry Smith   b->i  = b->j + nz;
11592593348eSBarry Smith   b->singlemalloc = 1;
11602593348eSBarry Smith 
1161de6a44a3SBarry Smith   b->i[0] = 0;
1162b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
11632593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
11642593348eSBarry Smith   }
11652593348eSBarry Smith 
1166b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1167b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1168b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1169b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
11702593348eSBarry Smith 
1171b6490206SBarry Smith   b->bs               = bs;
11729df24120SSatish Balay   b->bs2              = bs2;
1173b6490206SBarry Smith   b->mbs              = mbs;
11742593348eSBarry Smith   b->nz               = 0;
11752593348eSBarry Smith   b->maxnz            = nz;
11762593348eSBarry Smith   b->sorted           = 0;
11772593348eSBarry Smith   b->roworiented      = 1;
11782593348eSBarry Smith   b->nonew            = 0;
11792593348eSBarry Smith   b->diag             = 0;
11802593348eSBarry Smith   b->solve_work       = 0;
1181de6a44a3SBarry Smith   b->mult_work        = 0;
11822593348eSBarry Smith   b->spptr            = 0;
11832593348eSBarry Smith   *A = B;
11842593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
11852593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
11862593348eSBarry Smith   return 0;
11872593348eSBarry Smith }
11882593348eSBarry Smith 
1189b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
11902593348eSBarry Smith {
11912593348eSBarry Smith   Mat         C;
1192b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
11939df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1194de6a44a3SBarry Smith 
1195de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
11962593348eSBarry Smith 
11972593348eSBarry Smith   *B = 0;
1198b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
11992593348eSBarry Smith   PLogObjectCreate(C);
1200b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
12012593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1202b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1203b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
12042593348eSBarry Smith   C->factor     = A->factor;
12052593348eSBarry Smith   c->row        = 0;
12062593348eSBarry Smith   c->col        = 0;
12072593348eSBarry Smith   C->assembled  = PETSC_TRUE;
12082593348eSBarry Smith 
120944cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
121044cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
121144cd7ae7SLois Curfman McInnes   C->M          = a->m;
121244cd7ae7SLois Curfman McInnes   C->N          = a->n;
121344cd7ae7SLois Curfman McInnes 
1214b6490206SBarry Smith   c->bs         = a->bs;
12159df24120SSatish Balay   c->bs2        = a->bs2;
1216b6490206SBarry Smith   c->mbs        = a->mbs;
1217b6490206SBarry Smith   c->nbs        = a->nbs;
12182593348eSBarry Smith 
1219b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1220b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1221b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
12222593348eSBarry Smith     c->imax[i] = a->imax[i];
12232593348eSBarry Smith     c->ilen[i] = a->ilen[i];
12242593348eSBarry Smith   }
12252593348eSBarry Smith 
12262593348eSBarry Smith   /* allocate the matrix space */
12272593348eSBarry Smith   c->singlemalloc = 1;
12287e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
12292593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
12307e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1231de6a44a3SBarry Smith   c->i  = c->j + nz;
1232b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1233b6490206SBarry Smith   if (mbs > 0) {
1234de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
12352593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
12367e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
12372593348eSBarry Smith     }
12382593348eSBarry Smith   }
12392593348eSBarry Smith 
1240b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
12412593348eSBarry Smith   c->sorted      = a->sorted;
12422593348eSBarry Smith   c->roworiented = a->roworiented;
12432593348eSBarry Smith   c->nonew       = a->nonew;
12442593348eSBarry Smith 
12452593348eSBarry Smith   if (a->diag) {
1246b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1247b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1248b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
12492593348eSBarry Smith       c->diag[i] = a->diag[i];
12502593348eSBarry Smith     }
12512593348eSBarry Smith   }
12522593348eSBarry Smith   else c->diag          = 0;
12532593348eSBarry Smith   c->nz                 = a->nz;
12542593348eSBarry Smith   c->maxnz              = a->maxnz;
12552593348eSBarry Smith   c->solve_work         = 0;
12562593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
12577fc0212eSBarry Smith   c->mult_work          = 0;
12582593348eSBarry Smith   *B = C;
12592593348eSBarry Smith   return 0;
12602593348eSBarry Smith }
12612593348eSBarry Smith 
126219bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
12632593348eSBarry Smith {
1264b6490206SBarry Smith   Mat_SeqBAIJ  *a;
12652593348eSBarry Smith   Mat          B;
1266de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1267b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
126835aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1269a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1270b6490206SBarry Smith   Scalar       *aa;
127119bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
12722593348eSBarry Smith 
1273*1a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1274de6a44a3SBarry Smith   bs2  = bs*bs;
1275b6490206SBarry Smith 
12762593348eSBarry Smith   MPI_Comm_size(comm,&size);
1277b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
127890ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
127977c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1280de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
12812593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
12822593348eSBarry Smith 
1283b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
128435aab85fSBarry Smith 
128535aab85fSBarry Smith   /*
128635aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
128735aab85fSBarry Smith     divisible by the blocksize
128835aab85fSBarry Smith   */
1289b6490206SBarry Smith   mbs        = M/bs;
129035aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
129135aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
129235aab85fSBarry Smith   else                  mbs++;
129335aab85fSBarry Smith   if (extra_rows) {
1294*1a6a6d98SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
129535aab85fSBarry Smith   }
1296b6490206SBarry Smith 
12972593348eSBarry Smith   /* read in row lengths */
129835aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
129977c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
130035aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
13012593348eSBarry Smith 
1302b6490206SBarry Smith   /* read in column indices */
130335aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
130477c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
130535aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1306b6490206SBarry Smith 
1307b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1308b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1309b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
131035aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
131135aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
131235aab85fSBarry Smith   masked = mask + mbs;
1313b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1314b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
131535aab85fSBarry Smith     nmask = 0;
1316b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1317b6490206SBarry Smith       kmax = rowlengths[rowcount];
1318b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
131935aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
132035aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1321b6490206SBarry Smith       }
1322b6490206SBarry Smith       rowcount++;
1323b6490206SBarry Smith     }
132435aab85fSBarry Smith     browlengths[i] += nmask;
132535aab85fSBarry Smith     /* zero out the mask elements we set */
132635aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1327b6490206SBarry Smith   }
1328b6490206SBarry Smith 
13292593348eSBarry Smith   /* create our matrix */
133035aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
133135aab85fSBarry Smith          CHKERRQ(ierr);
13322593348eSBarry Smith   B = *A;
1333b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
13342593348eSBarry Smith 
13352593348eSBarry Smith   /* set matrix "i" values */
1336de6a44a3SBarry Smith   a->i[0] = 0;
1337b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1338b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1339b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
13402593348eSBarry Smith   }
1341b6490206SBarry Smith   a->nz         = 0;
1342b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
13432593348eSBarry Smith 
1344b6490206SBarry Smith   /* read in nonzero values */
134535aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
134677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
134735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1348b6490206SBarry Smith 
1349b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1350b6490206SBarry Smith   nzcount = 0; jcount = 0;
1351b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1352b6490206SBarry Smith     nzcountb = nzcount;
135335aab85fSBarry Smith     nmask    = 0;
1354b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1355b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1356b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
135735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
135835aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1359b6490206SBarry Smith       }
1360b6490206SBarry Smith       rowcount++;
1361b6490206SBarry Smith     }
1362de6a44a3SBarry Smith     /* sort the masked values */
136377c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1364de6a44a3SBarry Smith 
1365b6490206SBarry Smith     /* set "j" values into matrix */
1366b6490206SBarry Smith     maskcount = 1;
136735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
136835aab85fSBarry Smith       a->j[jcount++]  = masked[j];
1369de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
1370b6490206SBarry Smith     }
1371b6490206SBarry Smith     /* set "a" values into matrix */
1372de6a44a3SBarry Smith     ishift = bs2*a->i[i];
1373b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1374b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1375b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
1376de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
1377de6a44a3SBarry Smith         block  = mask[tmp] - 1;
1378de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
1379de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
1380b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
1381b6490206SBarry Smith       }
1382b6490206SBarry Smith     }
138335aab85fSBarry Smith     /* zero out the mask elements we set */
138435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1385b6490206SBarry Smith   }
138635aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1387b6490206SBarry Smith 
1388b6490206SBarry Smith   PetscFree(rowlengths);
1389b6490206SBarry Smith   PetscFree(browlengths);
1390b6490206SBarry Smith   PetscFree(aa);
1391b6490206SBarry Smith   PetscFree(jj);
1392b6490206SBarry Smith   PetscFree(mask);
1393b6490206SBarry Smith 
1394b6490206SBarry Smith   B->assembled = PETSC_TRUE;
1395de6a44a3SBarry Smith 
1396de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1397de6a44a3SBarry Smith   if (flg) {
139819bcc07fSBarry Smith     Viewer tviewer;
139919bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
140090ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
140119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
140219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1403de6a44a3SBarry Smith   }
1404de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1405de6a44a3SBarry Smith   if (flg) {
140619bcc07fSBarry Smith     Viewer tviewer;
140719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
140890ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
140919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
141019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1411de6a44a3SBarry Smith   }
1412de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1413de6a44a3SBarry Smith   if (flg) {
141419bcc07fSBarry Smith     Viewer tviewer;
141519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
141619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
141719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1418de6a44a3SBarry Smith   }
1419de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1420de6a44a3SBarry Smith   if (flg) {
142119bcc07fSBarry Smith     Viewer tviewer;
142219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
142390ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
142419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
142519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1426de6a44a3SBarry Smith   }
1427de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1428de6a44a3SBarry Smith   if (flg) {
142919bcc07fSBarry Smith     Viewer tviewer;
143019bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
143119bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
143219bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
143319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1434de6a44a3SBarry Smith   }
14352593348eSBarry Smith   return 0;
14362593348eSBarry Smith }
14372593348eSBarry Smith 
14382593348eSBarry Smith 
14392593348eSBarry Smith 
1440