xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 39b95ed191f95c36f0dab8969f3a8ca8da8a1f2c)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*39b95ed1SBarry Smith static char vcid[] = "$Id: baij.c,v 1.48 1996/05/17 21:49:04 curfman Exp bsmith $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
111a6a6d98SBarry Smith #include "src/vec/vecimpl.h"
121a6a6d98SBarry Smith #include "src/inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
143b547af2SSatish Balay 
15bcd2baecSBarry Smith extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41a7c10996SSatish Balay   ishift = 0;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
441a6a6d98SBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr);
451a6a6d98SBarry Smith     ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
462593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
47bcd2baecSBarry Smith   } else {
48bcd2baecSBarry Smith     if (ishift == oshift) {
491a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
50bcd2baecSBarry Smith     }
51bcd2baecSBarry Smith     else if (ishift == -1) {
52bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
531a6a6d98SBarry Smith       int nz = a->i[n] - 1;
54bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
551a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
561a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
57bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
581a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
59bcd2baecSBarry Smith     } else {
60bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
611a6a6d98SBarry Smith       int nz = a->i[n] - 1;
62bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
631a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]++;
641a6a6d98SBarry Smith       ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
65bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
661a6a6d98SBarry Smith       for ( i=0; i<n+1; i++ ) a->i[i]--;
67bcd2baecSBarry Smith     }
68bcd2baecSBarry Smith   }
692593348eSBarry Smith   return 0;
702593348eSBarry Smith }
712593348eSBarry Smith 
72de6a44a3SBarry Smith /*
73de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
74de6a44a3SBarry Smith */
75de6a44a3SBarry Smith 
76de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
77de6a44a3SBarry Smith {
78de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
797fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
80de6a44a3SBarry Smith 
81de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
82de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
837fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
84de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
85de6a44a3SBarry Smith       if (a->j[j] == i) {
86de6a44a3SBarry Smith         diag[i] = j;
87de6a44a3SBarry Smith         break;
88de6a44a3SBarry Smith       }
89de6a44a3SBarry Smith     }
90de6a44a3SBarry Smith   }
91de6a44a3SBarry Smith   a->diag = diag;
92de6a44a3SBarry Smith   return 0;
93de6a44a3SBarry Smith }
942593348eSBarry Smith 
952593348eSBarry Smith #include "draw.h"
962593348eSBarry Smith #include "pinclude/pviewer.h"
9777c4ece6SBarry Smith #include "sys.h"
982593348eSBarry Smith 
99b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1002593348eSBarry Smith {
101b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1029df24120SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
103b6490206SBarry Smith   Scalar      *aa;
1042593348eSBarry Smith 
10590ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1062593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1072593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1082593348eSBarry Smith   col_lens[1] = a->m;
1092593348eSBarry Smith   col_lens[2] = a->n;
1107e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1112593348eSBarry Smith 
1122593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
113b6490206SBarry Smith   count = 0;
114b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
115b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
116b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
117b6490206SBarry Smith     }
1182593348eSBarry Smith   }
11977c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1202593348eSBarry Smith   PetscFree(col_lens);
1212593348eSBarry Smith 
1222593348eSBarry Smith   /* store column indices (zero start index) */
1237e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
124b6490206SBarry Smith   count = 0;
125b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
126b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
127b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
128b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
129b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1302593348eSBarry Smith         }
1312593348eSBarry Smith       }
132b6490206SBarry Smith     }
133b6490206SBarry Smith   }
1347e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
135b6490206SBarry Smith   PetscFree(jj);
1362593348eSBarry Smith 
1372593348eSBarry Smith   /* store nonzero values */
1387e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
139b6490206SBarry Smith   count = 0;
140b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
141b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
142b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
143b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1447e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
145b6490206SBarry Smith         }
146b6490206SBarry Smith       }
147b6490206SBarry Smith     }
148b6490206SBarry Smith   }
1497e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
150b6490206SBarry Smith   PetscFree(aa);
1512593348eSBarry Smith   return 0;
1522593348eSBarry Smith }
1532593348eSBarry Smith 
154b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1552593348eSBarry Smith {
156b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1579df24120SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
1582593348eSBarry Smith   FILE        *fd;
1592593348eSBarry Smith   char        *outputname;
1602593348eSBarry Smith 
16190ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1622593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16390ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
1647ddc982cSLois Curfman McInnes   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
1657ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1662593348eSBarry Smith   }
16790ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
168b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1692593348eSBarry Smith   }
17044cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17144cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17244cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17344cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17444cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17544cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17644cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17744cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17844cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
17944cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18044cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18144cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18244cd7ae7SLois Curfman McInnes #else
18344cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18444cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18544cd7ae7SLois Curfman McInnes #endif
18644cd7ae7SLois Curfman McInnes           }
18744cd7ae7SLois Curfman McInnes         }
18844cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
18944cd7ae7SLois Curfman McInnes       }
19044cd7ae7SLois Curfman McInnes     }
19144cd7ae7SLois Curfman McInnes   }
1922593348eSBarry Smith   else {
193b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
194b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
195b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
196b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
197b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19888685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1997e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
20088685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2017e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20288685aaeSLois Curfman McInnes           }
20388685aaeSLois Curfman McInnes           else {
2047e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20588685aaeSLois Curfman McInnes           }
20688685aaeSLois Curfman McInnes #else
2077e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20888685aaeSLois Curfman McInnes #endif
2092593348eSBarry Smith           }
2102593348eSBarry Smith         }
2112593348eSBarry Smith         fprintf(fd,"\n");
2122593348eSBarry Smith       }
2132593348eSBarry Smith     }
214b6490206SBarry Smith   }
2152593348eSBarry Smith   fflush(fd);
2162593348eSBarry Smith   return 0;
2172593348eSBarry Smith }
2182593348eSBarry Smith 
219b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
2202593348eSBarry Smith {
2212593348eSBarry Smith   Mat         A = (Mat) obj;
22219bcc07fSBarry Smith   ViewerType  vtype;
22319bcc07fSBarry Smith   int         ierr;
2242593348eSBarry Smith 
2252593348eSBarry Smith   if (!viewer) {
22619bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
2272593348eSBarry Smith   }
22819bcc07fSBarry Smith 
22919bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
23019bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
231b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
2322593348eSBarry Smith   }
23319bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
234b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
2352593348eSBarry Smith   }
23619bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
237b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
2382593348eSBarry Smith   }
23919bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
240b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
2412593348eSBarry Smith   }
2422593348eSBarry Smith   return 0;
2432593348eSBarry Smith }
244b6490206SBarry Smith 
245119a934aSSatish Balay #define CHUNKSIZE  10
246cd0e1443SSatish Balay 
247cd0e1443SSatish Balay /* This version has row oriented v  */
248cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
249cd0e1443SSatish Balay {
250cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
251cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
252cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
253a7c10996SSatish Balay   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
2549df24120SSatish Balay   int          ridx,cidx,bs2=a->bs2;
255cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
256cd0e1443SSatish Balay 
257cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
258cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
259cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
260cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
261a7c10996SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
262cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
263cd0e1443SSatish Balay     low = 0;
264cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
265cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
266cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
267a7c10996SSatish Balay       col = in[l]; bcol = col/bs;
268cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
269cd0e1443SSatish Balay       if (roworiented) {
270cd0e1443SSatish Balay         value = *v++;
271cd0e1443SSatish Balay       }
272cd0e1443SSatish Balay       else {
273cd0e1443SSatish Balay         value = v[k + l*m];
274cd0e1443SSatish Balay       }
275cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
276cd0e1443SSatish Balay       while (high-low > 5) {
277cd0e1443SSatish Balay         t = (low+high)/2;
278cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
279cd0e1443SSatish Balay         else             low  = t;
280cd0e1443SSatish Balay       }
281cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
282cd0e1443SSatish Balay         if (rp[i] > bcol) break;
283cd0e1443SSatish Balay         if (rp[i] == bcol) {
2847e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
285cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
286cd0e1443SSatish Balay           else                  *bap  = value;
287cd0e1443SSatish Balay           goto noinsert;
288cd0e1443SSatish Balay         }
289cd0e1443SSatish Balay       }
290cd0e1443SSatish Balay       if (nonew) goto noinsert;
291cd0e1443SSatish Balay       if (nrow >= rmax) {
292cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
293cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
294cd0e1443SSatish Balay         Scalar *new_a;
295cd0e1443SSatish Balay 
296cd0e1443SSatish Balay         /* malloc new storage space */
2977e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
298cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
2997e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
300cd0e1443SSatish Balay         new_i   = new_j + new_nz;
301cd0e1443SSatish Balay 
302cd0e1443SSatish Balay         /* copy over old data into new slots */
303cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
3047e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
305a7c10996SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
306a7c10996SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
307a7c10996SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
308cd0e1443SSatish Balay                                                            len*sizeof(int));
309a7c10996SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
310a7c10996SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
311a7c10996SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
312a7c10996SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
313cd0e1443SSatish Balay         /* free up old matrix storage */
314cd0e1443SSatish Balay         PetscFree(a->a);
315cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
316cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
317cd0e1443SSatish Balay         a->singlemalloc = 1;
318cd0e1443SSatish Balay 
319a7c10996SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
320cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
3217e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
32218e7b8e4SLois Curfman McInnes         a->maxnz += bs2*CHUNKSIZE;
323cd0e1443SSatish Balay         a->reallocs++;
324119a934aSSatish Balay         a->nz++;
325cd0e1443SSatish Balay       }
3267e67e3f9SSatish Balay       N = nrow++ - 1;
327cd0e1443SSatish Balay       /* shift up all the later entries in this row */
328cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
329cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
3307e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
331cd0e1443SSatish Balay       }
3327e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
333cd0e1443SSatish Balay       rp[i] = bcol;
3347e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
335cd0e1443SSatish Balay       noinsert:;
336cd0e1443SSatish Balay       low = i;
337cd0e1443SSatish Balay     }
338cd0e1443SSatish Balay     ailen[brow] = nrow;
339cd0e1443SSatish Balay   }
340cd0e1443SSatish Balay   return 0;
341cd0e1443SSatish Balay }
342cd0e1443SSatish Balay 
3430b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
3440b824a48SSatish Balay {
3450b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3460b824a48SSatish Balay   *m = a->m; *n = a->n;
3470b824a48SSatish Balay   return 0;
3480b824a48SSatish Balay }
3490b824a48SSatish Balay 
3500b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
3510b824a48SSatish Balay {
3520b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3530b824a48SSatish Balay   *m = 0; *n = a->m;
3540b824a48SSatish Balay   return 0;
3550b824a48SSatish Balay }
3560b824a48SSatish Balay 
3579867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3589867e207SSatish Balay {
3599867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3607e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
3619867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
3629867e207SSatish Balay 
3639867e207SSatish Balay   bs  = a->bs;
3649867e207SSatish Balay   ai  = a->i;
3659867e207SSatish Balay   aj  = a->j;
3669867e207SSatish Balay   aa  = a->a;
3679df24120SSatish Balay   bs2 = a->bs2;
3689867e207SSatish Balay 
3699867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
3709867e207SSatish Balay 
3719867e207SSatish Balay   bn  = row/bs;   /* Block number */
3729867e207SSatish Balay   bp  = row % bs; /* Block Position */
3739867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
3749867e207SSatish Balay   *nz = bs*M;
3759867e207SSatish Balay 
3769867e207SSatish Balay   if (v) {
3779867e207SSatish Balay     *v = 0;
3789867e207SSatish Balay     if (*nz) {
3799867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
3809867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3819867e207SSatish Balay         v_i  = *v + i*bs;
3827e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
3837e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
3849867e207SSatish Balay       }
3859867e207SSatish Balay     }
3869867e207SSatish Balay   }
3879867e207SSatish Balay 
3889867e207SSatish Balay   if (idx) {
3899867e207SSatish Balay     *idx = 0;
3909867e207SSatish Balay     if (*nz) {
3919867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
3929867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3939867e207SSatish Balay         idx_i = *idx + i*bs;
3949867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
3959867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
3969867e207SSatish Balay       }
3979867e207SSatish Balay     }
3989867e207SSatish Balay   }
3999867e207SSatish Balay   return 0;
4009867e207SSatish Balay }
4019867e207SSatish Balay 
4029867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4039867e207SSatish Balay {
4049867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
4059867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
4069867e207SSatish Balay   return 0;
4079867e207SSatish Balay }
408b6490206SBarry Smith 
409f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
410f2501298SSatish Balay {
411f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
412f2501298SSatish Balay   Mat         C;
413f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
4149df24120SSatish Balay   int         *rows,*cols,bs2=a->bs2;
415f2501298SSatish Balay   Scalar      *array=a->a;
416f2501298SSatish Balay 
417f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
418f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
419f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
420f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
421a7c10996SSatish Balay 
422a7c10996SSatish Balay   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
423f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
424f2501298SSatish Balay   PetscFree(col);
425f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
426f2501298SSatish Balay   cols = rows + bs;
427f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
428f2501298SSatish Balay     cols[0] = i*bs;
429f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
430f2501298SSatish Balay     len = ai[i+1] - ai[i];
431f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
432f2501298SSatish Balay       rows[0] = (*aj++)*bs;
433f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
434f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
435f2501298SSatish Balay       array += bs2;
436f2501298SSatish Balay     }
437f2501298SSatish Balay   }
4381073c447SSatish Balay   PetscFree(rows);
439f2501298SSatish Balay 
440f2501298SSatish Balay   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
441f2501298SSatish Balay   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
442f2501298SSatish Balay 
443f2501298SSatish Balay   if (B != PETSC_NULL) {
444f2501298SSatish Balay     *B = C;
445f2501298SSatish Balay   } else {
446f2501298SSatish Balay     /* This isn't really an in-place transpose */
447f2501298SSatish Balay     PetscFree(a->a);
448f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
449f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
450f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
451f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
452f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
453f2501298SSatish Balay     PetscFree(a);
454f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
455f2501298SSatish Balay     PetscHeaderDestroy(C);
456f2501298SSatish Balay   }
457f2501298SSatish Balay   return 0;
458f2501298SSatish Balay }
459f2501298SSatish Balay 
460f2501298SSatish Balay 
461584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
462584200bdSSatish Balay {
463584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
464584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
465a7c10996SSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen;
4669df24120SSatish Balay   int        mbs = a->mbs, bs2 = a->bs2;
467584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
468584200bdSSatish Balay 
469584200bdSSatish Balay   if (mode == FLUSH_ASSEMBLY) return 0;
470584200bdSSatish Balay 
471584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
472584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
473584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
474584200bdSSatish Balay     if (fshift) {
475a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
476584200bdSSatish Balay       N = ailen[i];
477584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
478584200bdSSatish Balay         ip[j-fshift] = ip[j];
4797e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
480584200bdSSatish Balay       }
481584200bdSSatish Balay     }
482584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
483584200bdSSatish Balay   }
484584200bdSSatish Balay   if (mbs) {
485584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
486584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
487584200bdSSatish Balay   }
488584200bdSSatish Balay   /* reset ilen and imax for each row */
489584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
490584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
491584200bdSSatish Balay   }
492a7c10996SSatish Balay   a->nz = ai[mbs];
493584200bdSSatish Balay 
494584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
495584200bdSSatish Balay   if (fshift && a->diag) {
496584200bdSSatish Balay     PetscFree(a->diag);
497584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
498584200bdSSatish Balay     a->diag = 0;
499584200bdSSatish Balay   }
50067790700SSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs);
501584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n",
502584200bdSSatish Balay            a->reallocs);
503584200bdSSatish Balay   return 0;
504584200bdSSatish Balay }
505584200bdSSatish Balay 
506b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
5072593348eSBarry Smith {
508b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5099df24120SSatish Balay   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
5102593348eSBarry Smith   return 0;
5112593348eSBarry Smith }
5122593348eSBarry Smith 
513b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
5142593348eSBarry Smith {
5152593348eSBarry Smith   Mat         A  = (Mat) obj;
516b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5172593348eSBarry Smith 
5182593348eSBarry Smith #if defined(PETSC_LOG)
5192593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
5202593348eSBarry Smith #endif
5212593348eSBarry Smith   PetscFree(a->a);
5222593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
5232593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
5242593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
5252593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
5262593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
527de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
5282593348eSBarry Smith   PetscFree(a);
529f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
530f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
5312593348eSBarry Smith   return 0;
5322593348eSBarry Smith }
5332593348eSBarry Smith 
534b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
5352593348eSBarry Smith {
536b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5372593348eSBarry Smith   if      (op == ROW_ORIENTED)              a->roworiented = 1;
5382593348eSBarry Smith   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
5392593348eSBarry Smith   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
5402593348eSBarry Smith   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
5412593348eSBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
5422593348eSBarry Smith   else if (op == ROWS_SORTED ||
5432593348eSBarry Smith            op == SYMMETRIC_MATRIX ||
5442593348eSBarry Smith            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5452593348eSBarry Smith            op == YES_NEW_DIAGONALS)
54694a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
5472593348eSBarry Smith   else if (op == NO_NEW_DIAGONALS)
548b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
5492593348eSBarry Smith   else
550b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
5512593348eSBarry Smith   return 0;
5522593348eSBarry Smith }
5532593348eSBarry Smith 
5542593348eSBarry Smith 
5552593348eSBarry Smith /* -------------------------------------------------------*/
5562593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
5572593348eSBarry Smith /* -------------------------------------------------------*/
558b6490206SBarry Smith #include "pinclude/plapack.h"
559b6490206SBarry Smith 
560*39b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
5612593348eSBarry Smith {
562b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
5631a6a6d98SBarry Smith   Scalar          *xg,*zg;
564*39b95ed1SBarry Smith   register Scalar *x,*z,*v,sum;
565*39b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
5662593348eSBarry Smith 
567bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
568bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
569b6490206SBarry Smith 
570119a934aSSatish Balay   idx   = a->j;
571119a934aSSatish Balay   v     = a->a;
572119a934aSSatish Balay   ii    = a->i;
573119a934aSSatish Balay 
574119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
575119a934aSSatish Balay     n    = ii[1] - ii[0]; ii++;
576119a934aSSatish Balay     sum  = 0.0;
577119a934aSSatish Balay     while (n--) sum += *v++ * x[*idx++];
5781a6a6d98SBarry Smith     z[i] = sum;
579119a934aSSatish Balay   }
580*39b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
581*39b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
582*39b95ed1SBarry Smith   PLogFlops(2*a->nz - a->m);
583*39b95ed1SBarry Smith   return 0;
584*39b95ed1SBarry Smith }
585*39b95ed1SBarry Smith 
586*39b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
587*39b95ed1SBarry Smith {
588*39b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
589*39b95ed1SBarry Smith   Scalar          *xg,*zg;
590*39b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2;
591*39b95ed1SBarry Smith   register Scalar x1,x2;
592*39b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
593*39b95ed1SBarry Smith   int             j,n,ierr;
594*39b95ed1SBarry Smith 
595*39b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
596*39b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
597*39b95ed1SBarry Smith 
598*39b95ed1SBarry Smith   idx   = a->j;
599*39b95ed1SBarry Smith   v     = a->a;
600*39b95ed1SBarry Smith   ii    = a->i;
601*39b95ed1SBarry Smith 
602119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
603119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
604119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0;
605119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
606119a934aSSatish Balay       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
607119a934aSSatish Balay       sum1 += v[0]*x1 + v[2]*x2;
608119a934aSSatish Balay       sum2 += v[1]*x1 + v[3]*x2;
609119a934aSSatish Balay       v += 4;
610119a934aSSatish Balay     }
6111a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2;
612119a934aSSatish Balay     z += 2;
613119a934aSSatish Balay   }
614*39b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
615*39b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
616*39b95ed1SBarry Smith   PLogFlops(4*a->nz - a->m);
617*39b95ed1SBarry Smith   return 0;
618*39b95ed1SBarry Smith }
619*39b95ed1SBarry Smith 
620*39b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
621*39b95ed1SBarry Smith {
622*39b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
623*39b95ed1SBarry Smith   Scalar          *xg,*zg;
624*39b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
625*39b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
626*39b95ed1SBarry Smith 
627*39b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
628*39b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
629*39b95ed1SBarry Smith 
630*39b95ed1SBarry Smith   idx   = a->j;
631*39b95ed1SBarry Smith   v     = a->a;
632*39b95ed1SBarry Smith   ii    = a->i;
633*39b95ed1SBarry Smith 
634119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
635119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
636119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
637119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
638119a934aSSatish Balay       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
639119a934aSSatish Balay       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
640119a934aSSatish Balay       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
641119a934aSSatish Balay       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
642119a934aSSatish Balay       v += 9;
643119a934aSSatish Balay     }
6441a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3;
645119a934aSSatish Balay     z += 3;
646119a934aSSatish Balay   }
647*39b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
648*39b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
649*39b95ed1SBarry Smith   PLogFlops(18*a->nz - a->m);
650*39b95ed1SBarry Smith   return 0;
651*39b95ed1SBarry Smith }
652*39b95ed1SBarry Smith 
653*39b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
654*39b95ed1SBarry Smith {
655*39b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
656*39b95ed1SBarry Smith   Scalar          *xg,*zg;
657*39b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
658*39b95ed1SBarry Smith   register Scalar x1,x2,x3,x4;
659*39b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii;
660*39b95ed1SBarry Smith   int             j,n,ierr;
661*39b95ed1SBarry Smith 
662*39b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
663*39b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
664*39b95ed1SBarry Smith 
665*39b95ed1SBarry Smith   idx   = a->j;
666*39b95ed1SBarry Smith   v     = a->a;
667*39b95ed1SBarry Smith   ii    = a->i;
668*39b95ed1SBarry Smith 
669119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
670119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
671119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
672119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
673119a934aSSatish Balay       xb = x + 4*(*idx++);
674119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
675119a934aSSatish Balay       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
676119a934aSSatish Balay       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
677119a934aSSatish Balay       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
678119a934aSSatish Balay       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
679119a934aSSatish Balay       v += 16;
680119a934aSSatish Balay     }
6811a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
682119a934aSSatish Balay     z += 4;
683119a934aSSatish Balay   }
684*39b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
685*39b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
686*39b95ed1SBarry Smith   PLogFlops(32*a->nz - a->m);
687*39b95ed1SBarry Smith   return 0;
688*39b95ed1SBarry Smith }
689*39b95ed1SBarry Smith 
690*39b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
691*39b95ed1SBarry Smith {
692*39b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
693*39b95ed1SBarry Smith   Scalar          *xg,*zg;
694*39b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
695*39b95ed1SBarry Smith   register Scalar x1,x2,x3,x4,x5;
696*39b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,j,n,ierr;
697*39b95ed1SBarry Smith 
698*39b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
699*39b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
700*39b95ed1SBarry Smith 
701*39b95ed1SBarry Smith   idx   = a->j;
702*39b95ed1SBarry Smith   v     = a->a;
703*39b95ed1SBarry Smith   ii    = a->i;
704*39b95ed1SBarry Smith 
705119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
706119a934aSSatish Balay     n  = ii[1] - ii[0]; ii++;
707119a934aSSatish Balay     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
708119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
709119a934aSSatish Balay       xb = x + 5*(*idx++);
710119a934aSSatish Balay       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
711119a934aSSatish Balay       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
712119a934aSSatish Balay       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
713119a934aSSatish Balay       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
714119a934aSSatish Balay       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
715119a934aSSatish Balay       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
716119a934aSSatish Balay       v += 25;
717119a934aSSatish Balay     }
7181a6a6d98SBarry Smith     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
719119a934aSSatish Balay     z += 5;
720119a934aSSatish Balay   }
721*39b95ed1SBarry Smith   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
722*39b95ed1SBarry Smith   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
723*39b95ed1SBarry Smith   PLogFlops(50*a->nz - a->m);
724*39b95ed1SBarry Smith   return 0;
725*39b95ed1SBarry Smith }
726*39b95ed1SBarry Smith 
727*39b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
728*39b95ed1SBarry Smith {
729*39b95ed1SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
730*39b95ed1SBarry Smith   Scalar          *xg,*zg;
731*39b95ed1SBarry Smith   register Scalar *x,*z,*v,*xb;
732*39b95ed1SBarry Smith   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
7331a6a6d98SBarry Smith   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0;
734*39b95ed1SBarry Smith 
735*39b95ed1SBarry Smith   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
736*39b95ed1SBarry Smith   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
737*39b95ed1SBarry Smith 
738*39b95ed1SBarry Smith   idx   = a->j;
739*39b95ed1SBarry Smith   v     = a->a;
740*39b95ed1SBarry Smith   ii    = a->i;
741*39b95ed1SBarry Smith 
742*39b95ed1SBarry Smith 
743119a934aSSatish Balay   if (!a->mult_work) {
7443b547af2SSatish Balay     k = PetscMax(a->m,a->n);
745*39b95ed1SBarry Smith     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
746119a934aSSatish Balay   }
747119a934aSSatish Balay   work = a->mult_work;
748119a934aSSatish Balay   for ( i=0; i<mbs; i++ ) {
749119a934aSSatish Balay     n     = ii[1] - ii[0]; ii++;
750119a934aSSatish Balay     ncols = n*bs;
751119a934aSSatish Balay     workt = work;
752119a934aSSatish Balay     for ( j=0; j<n; j++ ) {
753119a934aSSatish Balay       xb = x + bs*(*idx++);
754119a934aSSatish Balay       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
755119a934aSSatish Balay       workt += bs;
756119a934aSSatish Balay     }
7571a6a6d98SBarry Smith     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
758119a934aSSatish Balay     v += n*bs2;
759119a934aSSatish Balay     z += bs;
760119a934aSSatish Balay   }
7610419e6cdSSatish Balay    ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
7620419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
7631a6a6d98SBarry Smith   PLogFlops(2*a->nz*bs2 - a->m);
764bb42667fSSatish Balay   return 0;
765bb42667fSSatish Balay }
766bb42667fSSatish Balay 
7671a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
768bb42667fSSatish Balay {
769bb42667fSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
7701a6a6d98SBarry Smith   Scalar          *xg,*zg,*zb;
771bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
772bb1453f0SSatish Balay   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
7739df24120SSatish Balay   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
774119a934aSSatish Balay 
775119a934aSSatish Balay 
776bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
777bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
778bb1453f0SSatish Balay   PetscMemzero(z,N*sizeof(Scalar));
779bb42667fSSatish Balay 
780119a934aSSatish Balay   idx   = a->j;
781119a934aSSatish Balay   v     = a->a;
782119a934aSSatish Balay   ii    = a->i;
783119a934aSSatish Balay 
784119a934aSSatish Balay   switch (bs) {
785119a934aSSatish Balay   case 1:
786119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
787119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
788119a934aSSatish Balay       xb = x + i; x1 = xb[0];
789119a934aSSatish Balay       ib = idx + ai[i];
790119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
791bb1453f0SSatish Balay         rval    = ib[j];
792bb1453f0SSatish Balay         z[rval] += *v++ * x1;
793119a934aSSatish Balay       }
794119a934aSSatish Balay     }
795119a934aSSatish Balay     break;
796119a934aSSatish Balay   case 2:
797119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
798119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
799119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
800119a934aSSatish Balay       ib = idx + ai[i];
801119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
802119a934aSSatish Balay         rval      = ib[j]*2;
803bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
804bb1453f0SSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
805119a934aSSatish Balay         v += 4;
806119a934aSSatish Balay       }
807119a934aSSatish Balay     }
808119a934aSSatish Balay     break;
809119a934aSSatish Balay   case 3:
810119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
811119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
812119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
813119a934aSSatish Balay       ib = idx + ai[i];
814119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
815119a934aSSatish Balay         rval      = ib[j]*3;
816bb1453f0SSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
817bb1453f0SSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
818bb1453f0SSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
819119a934aSSatish Balay         v += 9;
820119a934aSSatish Balay       }
821119a934aSSatish Balay     }
822119a934aSSatish Balay     break;
823119a934aSSatish Balay   case 4:
824119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
825119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
826119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
827119a934aSSatish Balay       ib = idx + ai[i];
828119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
829119a934aSSatish Balay         rval      = ib[j]*4;
830bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
831bb1453f0SSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
832bb1453f0SSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
833bb1453f0SSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
834119a934aSSatish Balay         v += 16;
835119a934aSSatish Balay       }
836119a934aSSatish Balay     }
837119a934aSSatish Balay     break;
838119a934aSSatish Balay   case 5:
839119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
840119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
841119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
842119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
843119a934aSSatish Balay       ib = idx + ai[i];
844119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
845119a934aSSatish Balay         rval      = ib[j]*5;
846bb1453f0SSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
847bb1453f0SSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
848bb1453f0SSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
849bb1453f0SSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
850bb1453f0SSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
851119a934aSSatish Balay         v += 25;
852119a934aSSatish Balay       }
853119a934aSSatish Balay     }
854119a934aSSatish Balay     break;
855119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
856119a934aSSatish Balay     default: {
857119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
858119a934aSSatish Balay       if (!a->mult_work) {
8593b547af2SSatish Balay         k = PetscMax(a->m,a->n);
860bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
861119a934aSSatish Balay         CHKPTRQ(a->mult_work);
862119a934aSSatish Balay       }
863119a934aSSatish Balay       work = a->mult_work;
864119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
865119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
866119a934aSSatish Balay         ncols = n*bs;
867119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
868119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
869119a934aSSatish Balay         v += n*bs2;
870119a934aSSatish Balay         x += bs;
871119a934aSSatish Balay         workt = work;
872119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
873119a934aSSatish Balay           zb = z + bs*(*idx++);
874bb1453f0SSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
875119a934aSSatish Balay           workt += bs;
876119a934aSSatish Balay         }
877119a934aSSatish Balay       }
878119a934aSSatish Balay     }
879119a934aSSatish Balay   }
8800419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
8810419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
8822593348eSBarry Smith   return 0;
8832593348eSBarry Smith }
8842593348eSBarry Smith 
885de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
8862593348eSBarry Smith {
887b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8889df24120SSatish Balay   if (nz)  *nz  = a->bs2*a->nz;
889bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
890bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
8912593348eSBarry Smith   return 0;
8922593348eSBarry Smith }
8932593348eSBarry Smith 
89491d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
89591d316f6SSatish Balay {
89691d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
89791d316f6SSatish Balay 
89891d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
89991d316f6SSatish Balay 
90091d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
90191d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
902a7c10996SSatish Balay       (a->nz != b->nz)) {
90391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
90491d316f6SSatish Balay   }
90591d316f6SSatish Balay 
90691d316f6SSatish Balay   /* if the a->i are the same */
90791d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
90891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
90991d316f6SSatish Balay   }
91091d316f6SSatish Balay 
91191d316f6SSatish Balay   /* if a->j are the same */
91291d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
91391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
91491d316f6SSatish Balay   }
91591d316f6SSatish Balay 
91691d316f6SSatish Balay   /* if a->a are the same */
91791d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
91891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
91991d316f6SSatish Balay   }
92091d316f6SSatish Balay   *flg = PETSC_TRUE;
92191d316f6SSatish Balay   return 0;
92291d316f6SSatish Balay 
92391d316f6SSatish Balay }
92491d316f6SSatish Balay 
92591d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
92691d316f6SSatish Balay {
92791d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9287e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
92917e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
93017e48fc4SSatish Balay 
93117e48fc4SSatish Balay   bs   = a->bs;
93217e48fc4SSatish Balay   aa   = a->a;
93317e48fc4SSatish Balay   ai   = a->i;
93417e48fc4SSatish Balay   aj   = a->j;
93517e48fc4SSatish Balay   ambs = a->mbs;
9369df24120SSatish Balay   bs2  = a->bs2;
93791d316f6SSatish Balay 
93891d316f6SSatish Balay   VecSet(&zero,v);
93991d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
9409867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
94117e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
94217e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
94317e48fc4SSatish Balay       if (aj[j] == i) {
94417e48fc4SSatish Balay         row  = i*bs;
9457e67e3f9SSatish Balay         aa_j = aa+j*bs2;
9467e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
94791d316f6SSatish Balay         break;
94891d316f6SSatish Balay       }
94991d316f6SSatish Balay     }
95091d316f6SSatish Balay   }
95191d316f6SSatish Balay   return 0;
95291d316f6SSatish Balay }
95391d316f6SSatish Balay 
9549867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
9559867e207SSatish Balay {
9569867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9579867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
9587e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
9599867e207SSatish Balay 
9609867e207SSatish Balay   ai  = a->i;
9619867e207SSatish Balay   aj  = a->j;
9629867e207SSatish Balay   aa  = a->a;
9639867e207SSatish Balay   m   = a->m;
9649867e207SSatish Balay   n   = a->n;
9659867e207SSatish Balay   bs  = a->bs;
9669867e207SSatish Balay   mbs = a->mbs;
9679df24120SSatish Balay   bs2 = a->bs2;
9689867e207SSatish Balay   if (ll) {
9699867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
9709867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
9719867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
9729867e207SSatish Balay       M  = ai[i+1] - ai[i];
9739867e207SSatish Balay       li = l + i*bs;
9747e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
9759867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
9767e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
9779867e207SSatish Balay           (*v++) *= li[k%bs];
9789867e207SSatish Balay         }
9799867e207SSatish Balay       }
9809867e207SSatish Balay     }
9819867e207SSatish Balay   }
9829867e207SSatish Balay 
9839867e207SSatish Balay   if (rr) {
9849867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
9859867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
9869867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
9879867e207SSatish Balay       M  = ai[i+1] - ai[i];
9887e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
9899867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
9909867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
9919867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
9929867e207SSatish Balay           x = ri[k];
9939867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
9949867e207SSatish Balay         }
9959867e207SSatish Balay       }
9969867e207SSatish Balay     }
9979867e207SSatish Balay   }
9989867e207SSatish Balay   return 0;
9999867e207SSatish Balay }
10009867e207SSatish Balay 
10019867e207SSatish Balay 
1002b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1003b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
10042a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1005736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1006736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
10071a6a6d98SBarry Smith 
10081a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
10091a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
10101a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
10111a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
10121a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
10131a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
10141a6a6d98SBarry Smith 
10157fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
10167fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
10177fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
10187fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
10197fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
10207fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
10212593348eSBarry Smith 
1022b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
10232593348eSBarry Smith {
1024b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
10252593348eSBarry Smith   Scalar      *v = a->a;
10262593348eSBarry Smith   double      sum = 0.0;
10279df24120SSatish Balay   int         i,nz=a->nz,bs2=a->bs2;
10282593348eSBarry Smith 
10292593348eSBarry Smith   if (type == NORM_FROBENIUS) {
10300419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
10312593348eSBarry Smith #if defined(PETSC_COMPLEX)
10322593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
10332593348eSBarry Smith #else
10342593348eSBarry Smith       sum += (*v)*(*v); v++;
10352593348eSBarry Smith #endif
10362593348eSBarry Smith     }
10372593348eSBarry Smith     *norm = sqrt(sum);
10382593348eSBarry Smith   }
10392593348eSBarry Smith   else {
1040b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
10412593348eSBarry Smith   }
10422593348eSBarry Smith   return 0;
10432593348eSBarry Smith }
10442593348eSBarry Smith 
10452593348eSBarry Smith /*
10462593348eSBarry Smith      note: This can only work for identity for row and col. It would
10472593348eSBarry Smith    be good to check this and otherwise generate an error.
10482593348eSBarry Smith */
1049b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
10502593348eSBarry Smith {
1051b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
10522593348eSBarry Smith   Mat         outA;
1053de6a44a3SBarry Smith   int         ierr;
10542593348eSBarry Smith 
1055b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
10562593348eSBarry Smith 
10572593348eSBarry Smith   outA          = inA;
10582593348eSBarry Smith   inA->factor   = FACTOR_LU;
10592593348eSBarry Smith   a->row        = row;
10602593348eSBarry Smith   a->col        = col;
10612593348eSBarry Smith 
1062de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
10632593348eSBarry Smith 
10642593348eSBarry Smith   if (!a->diag) {
1065b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
10662593348eSBarry Smith   }
10677fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
10682593348eSBarry Smith   return 0;
10692593348eSBarry Smith }
10702593348eSBarry Smith 
1071b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
10722593348eSBarry Smith {
1073b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
10749df24120SSatish Balay   int         one = 1, totalnz = a->bs2*a->nz;
1075b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1076b6490206SBarry Smith   PLogFlops(totalnz);
10772593348eSBarry Smith   return 0;
10782593348eSBarry Smith }
10792593348eSBarry Smith 
10807e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
10817e67e3f9SSatish Balay {
10827e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
10837e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1084a7c10996SSatish Balay   int        *ai = a->i, *ailen = a->ilen;
10859df24120SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
10867e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
10877e67e3f9SSatish Balay 
10887e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
10897e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
10907e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
10917e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1092a7c10996SSatish Balay     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
10937e67e3f9SSatish Balay     nrow = ailen[brow];
10947e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
10957e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
10967e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1097a7c10996SSatish Balay       col  = in[l] ;
10987e67e3f9SSatish Balay       bcol = col/bs;
10997e67e3f9SSatish Balay       cidx = col%bs;
11007e67e3f9SSatish Balay       ridx = row%bs;
11017e67e3f9SSatish Balay       high = nrow;
11027e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
11037e67e3f9SSatish Balay       while (high-low > 5) {
11047e67e3f9SSatish Balay         t = (low+high)/2;
11057e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
11067e67e3f9SSatish Balay         else             low  = t;
11077e67e3f9SSatish Balay       }
11087e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
11097e67e3f9SSatish Balay         if (rp[i] > bcol) break;
11107e67e3f9SSatish Balay         if (rp[i] == bcol) {
11117e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
11127e67e3f9SSatish Balay           goto finished;
11137e67e3f9SSatish Balay         }
11147e67e3f9SSatish Balay       }
11157e67e3f9SSatish Balay       *v++ = zero;
11167e67e3f9SSatish Balay       finished:;
11177e67e3f9SSatish Balay     }
11187e67e3f9SSatish Balay   }
11197e67e3f9SSatish Balay   return 0;
11207e67e3f9SSatish Balay }
11217e67e3f9SSatish Balay 
11222593348eSBarry Smith /* -------------------------------------------------------------------*/
1123cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
11249867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1125*39b95ed1SBarry Smith        MatMult_SeqBAIJ_N,0,
11261a6a6d98SBarry Smith        MatMultTrans_SeqBAIJ,0,
11271a6a6d98SBarry Smith        MatSolve_SeqBAIJ_N,0,
1128b6490206SBarry Smith        0,0,
1129de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1130b6490206SBarry Smith        0,
1131f2501298SSatish Balay        MatTranspose_SeqBAIJ,
113217e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
11339867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1134584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1135b6490206SBarry Smith        0,
1136b6490206SBarry Smith        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1137b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
11387fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1139b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1140de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1141b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1142736121d4SSatish Balay        MatGetSubMatrix_SeqBAIJ,0,
1143b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1144b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1145af015674SSatish Balay        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
11467e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
11471a6a6d98SBarry Smith        0,MatScale_SeqBAIJ,
1148b6490206SBarry Smith        0};
11492593348eSBarry Smith 
11502593348eSBarry Smith /*@C
115144cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
115244cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
115344cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
11542593348eSBarry Smith    (or nzz).  By setting these parameters accurately, performance can be
11552593348eSBarry Smith    increased by more than a factor of 50.
11562593348eSBarry Smith 
11572593348eSBarry Smith    Input Parameters:
11582593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1159b6490206SBarry Smith .  bs - size of block
11602593348eSBarry Smith .  m - number of rows
11612593348eSBarry Smith .  n - number of columns
1162b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
1163b6490206SBarry Smith .  nzz - number of block nonzeros per block row or PETSC_NULL
1164b6490206SBarry Smith          (possibly different for each row)
11652593348eSBarry Smith 
11662593348eSBarry Smith    Output Parameter:
11672593348eSBarry Smith .  A - the matrix
11682593348eSBarry Smith 
11692593348eSBarry Smith    Notes:
117044cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
11712593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
117244cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
11732593348eSBarry Smith 
11742593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
11752593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
11762593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
11772593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
11782593348eSBarry Smith 
117944cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
11802593348eSBarry Smith @*/
1181b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
11822593348eSBarry Smith {
11832593348eSBarry Smith   Mat         B;
1184b6490206SBarry Smith   Mat_SeqBAIJ *b;
1185f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1186b6490206SBarry Smith 
1187f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1188f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
11892593348eSBarry Smith 
11902593348eSBarry Smith   *A = 0;
1191b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
11922593348eSBarry Smith   PLogObjectCreate(B);
1193b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
119444cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
11952593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
11961a6a6d98SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
11971a6a6d98SBarry Smith   if (!flg) {
11987fc0212eSBarry Smith     switch (bs) {
11997fc0212eSBarry Smith       case 1:
12007fc0212eSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
12011a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_1;
1202*39b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_1;
12037fc0212eSBarry Smith        break;
12044eeb42bcSBarry Smith       case 2:
12054eeb42bcSBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
12061a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_2;
1207*39b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_2;
12086d84be18SBarry Smith         break;
1209f327dd97SBarry Smith       case 3:
1210f327dd97SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
12111a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_3;
1212*39b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_3;
12134eeb42bcSBarry Smith         break;
12146d84be18SBarry Smith       case 4:
12156d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
12161a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_4;
1217*39b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_4;
12186d84be18SBarry Smith         break;
12196d84be18SBarry Smith       case 5:
12206d84be18SBarry Smith         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
12211a6a6d98SBarry Smith         B->ops.solve           = MatSolve_SeqBAIJ_5;
1222*39b95ed1SBarry Smith         B->ops.mult            = MatMult_SeqBAIJ_5;
12236d84be18SBarry Smith         break;
12247fc0212eSBarry Smith     }
12251a6a6d98SBarry Smith   }
1226b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1227b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
12282593348eSBarry Smith   B->factor           = 0;
12292593348eSBarry Smith   B->lupivotthreshold = 1.0;
12302593348eSBarry Smith   b->row              = 0;
12312593348eSBarry Smith   b->col              = 0;
12322593348eSBarry Smith   b->reallocs         = 0;
12332593348eSBarry Smith 
123444cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
123544cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1236b6490206SBarry Smith   b->mbs     = mbs;
1237f2501298SSatish Balay   b->nbs     = nbs;
1238b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
12392593348eSBarry Smith   if (nnz == PETSC_NULL) {
1240119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
12412593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1242b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1243b6490206SBarry Smith     nz = nz*mbs;
12442593348eSBarry Smith   }
12452593348eSBarry Smith   else {
12462593348eSBarry Smith     nz = 0;
1247b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
12482593348eSBarry Smith   }
12492593348eSBarry Smith 
12502593348eSBarry Smith   /* allocate the matrix space */
12517e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
12522593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
12537e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
12547e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
12552593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
12562593348eSBarry Smith   b->i  = b->j + nz;
12572593348eSBarry Smith   b->singlemalloc = 1;
12582593348eSBarry Smith 
1259de6a44a3SBarry Smith   b->i[0] = 0;
1260b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
12612593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
12622593348eSBarry Smith   }
12632593348eSBarry Smith 
1264b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1265b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1266b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1267b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
12682593348eSBarry Smith 
1269b6490206SBarry Smith   b->bs               = bs;
12709df24120SSatish Balay   b->bs2              = bs2;
1271b6490206SBarry Smith   b->mbs              = mbs;
12722593348eSBarry Smith   b->nz               = 0;
127318e7b8e4SLois Curfman McInnes   b->maxnz            = nz*bs2;
12742593348eSBarry Smith   b->sorted           = 0;
12752593348eSBarry Smith   b->roworiented      = 1;
12762593348eSBarry Smith   b->nonew            = 0;
12772593348eSBarry Smith   b->diag             = 0;
12782593348eSBarry Smith   b->solve_work       = 0;
1279de6a44a3SBarry Smith   b->mult_work        = 0;
12802593348eSBarry Smith   b->spptr            = 0;
12812593348eSBarry Smith   *A = B;
12822593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
12832593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
12842593348eSBarry Smith   return 0;
12852593348eSBarry Smith }
12862593348eSBarry Smith 
1287b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
12882593348eSBarry Smith {
12892593348eSBarry Smith   Mat         C;
1290b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
12919df24120SSatish Balay   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1292de6a44a3SBarry Smith 
1293de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
12942593348eSBarry Smith 
12952593348eSBarry Smith   *B = 0;
1296b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
12972593348eSBarry Smith   PLogObjectCreate(C);
1298b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
12992593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1300b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1301b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
13022593348eSBarry Smith   C->factor     = A->factor;
13032593348eSBarry Smith   c->row        = 0;
13042593348eSBarry Smith   c->col        = 0;
13052593348eSBarry Smith   C->assembled  = PETSC_TRUE;
13062593348eSBarry Smith 
130744cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
130844cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
130944cd7ae7SLois Curfman McInnes   C->M          = a->m;
131044cd7ae7SLois Curfman McInnes   C->N          = a->n;
131144cd7ae7SLois Curfman McInnes 
1312b6490206SBarry Smith   c->bs         = a->bs;
13139df24120SSatish Balay   c->bs2        = a->bs2;
1314b6490206SBarry Smith   c->mbs        = a->mbs;
1315b6490206SBarry Smith   c->nbs        = a->nbs;
13162593348eSBarry Smith 
1317b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1318b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1319b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
13202593348eSBarry Smith     c->imax[i] = a->imax[i];
13212593348eSBarry Smith     c->ilen[i] = a->ilen[i];
13222593348eSBarry Smith   }
13232593348eSBarry Smith 
13242593348eSBarry Smith   /* allocate the matrix space */
13252593348eSBarry Smith   c->singlemalloc = 1;
13267e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
13272593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
13287e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1329de6a44a3SBarry Smith   c->i  = c->j + nz;
1330b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1331b6490206SBarry Smith   if (mbs > 0) {
1332de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
13332593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
13347e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
13352593348eSBarry Smith     }
13362593348eSBarry Smith   }
13372593348eSBarry Smith 
1338b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
13392593348eSBarry Smith   c->sorted      = a->sorted;
13402593348eSBarry Smith   c->roworiented = a->roworiented;
13412593348eSBarry Smith   c->nonew       = a->nonew;
13422593348eSBarry Smith 
13432593348eSBarry Smith   if (a->diag) {
1344b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1345b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1346b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
13472593348eSBarry Smith       c->diag[i] = a->diag[i];
13482593348eSBarry Smith     }
13492593348eSBarry Smith   }
13502593348eSBarry Smith   else c->diag          = 0;
13512593348eSBarry Smith   c->nz                 = a->nz;
13522593348eSBarry Smith   c->maxnz              = a->maxnz;
13532593348eSBarry Smith   c->solve_work         = 0;
13542593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
13557fc0212eSBarry Smith   c->mult_work          = 0;
13562593348eSBarry Smith   *B = C;
13572593348eSBarry Smith   return 0;
13582593348eSBarry Smith }
13592593348eSBarry Smith 
136019bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
13612593348eSBarry Smith {
1362b6490206SBarry Smith   Mat_SeqBAIJ  *a;
13632593348eSBarry Smith   Mat          B;
1364de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1365b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
136635aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1367a7c10996SSatish Balay   int          *masked, nmask,tmp,bs2,ishift;
1368b6490206SBarry Smith   Scalar       *aa;
136919bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
13702593348eSBarry Smith 
13711a6a6d98SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1372de6a44a3SBarry Smith   bs2  = bs*bs;
1373b6490206SBarry Smith 
13742593348eSBarry Smith   MPI_Comm_size(comm,&size);
1375b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
137690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
137777c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1378de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
13792593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
13802593348eSBarry Smith 
1381b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
138235aab85fSBarry Smith 
138335aab85fSBarry Smith   /*
138435aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
138535aab85fSBarry Smith     divisible by the blocksize
138635aab85fSBarry Smith   */
1387b6490206SBarry Smith   mbs        = M/bs;
138835aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
138935aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
139035aab85fSBarry Smith   else                  mbs++;
139135aab85fSBarry Smith   if (extra_rows) {
13921a6a6d98SBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
139335aab85fSBarry Smith   }
1394b6490206SBarry Smith 
13952593348eSBarry Smith   /* read in row lengths */
139635aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
139777c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
139835aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
13992593348eSBarry Smith 
1400b6490206SBarry Smith   /* read in column indices */
140135aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
140277c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
140335aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1404b6490206SBarry Smith 
1405b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1406b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1407b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
140835aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
140935aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
141035aab85fSBarry Smith   masked = mask + mbs;
1411b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1412b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
141335aab85fSBarry Smith     nmask = 0;
1414b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1415b6490206SBarry Smith       kmax = rowlengths[rowcount];
1416b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
141735aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
141835aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1419b6490206SBarry Smith       }
1420b6490206SBarry Smith       rowcount++;
1421b6490206SBarry Smith     }
142235aab85fSBarry Smith     browlengths[i] += nmask;
142335aab85fSBarry Smith     /* zero out the mask elements we set */
142435aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1425b6490206SBarry Smith   }
1426b6490206SBarry Smith 
14272593348eSBarry Smith   /* create our matrix */
142835aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
142935aab85fSBarry Smith          CHKERRQ(ierr);
14302593348eSBarry Smith   B = *A;
1431b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
14322593348eSBarry Smith 
14332593348eSBarry Smith   /* set matrix "i" values */
1434de6a44a3SBarry Smith   a->i[0] = 0;
1435b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1436b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1437b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
14382593348eSBarry Smith   }
1439b6490206SBarry Smith   a->nz         = 0;
1440b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
14412593348eSBarry Smith 
1442b6490206SBarry Smith   /* read in nonzero values */
144335aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
144477c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
144535aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1446b6490206SBarry Smith 
1447b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1448b6490206SBarry Smith   nzcount = 0; jcount = 0;
1449b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1450b6490206SBarry Smith     nzcountb = nzcount;
145135aab85fSBarry Smith     nmask    = 0;
1452b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1453b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1454b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
145535aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
145635aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1457b6490206SBarry Smith       }
1458b6490206SBarry Smith       rowcount++;
1459b6490206SBarry Smith     }
1460de6a44a3SBarry Smith     /* sort the masked values */
146177c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1462de6a44a3SBarry Smith 
1463b6490206SBarry Smith     /* set "j" values into matrix */
1464b6490206SBarry Smith     maskcount = 1;
146535aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
146635aab85fSBarry Smith       a->j[jcount++]  = masked[j];
1467de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
1468b6490206SBarry Smith     }
1469b6490206SBarry Smith     /* set "a" values into matrix */
1470de6a44a3SBarry Smith     ishift = bs2*a->i[i];
1471b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1472b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1473b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
1474de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
1475de6a44a3SBarry Smith         block  = mask[tmp] - 1;
1476de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
1477de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
1478b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
1479b6490206SBarry Smith       }
1480b6490206SBarry Smith     }
148135aab85fSBarry Smith     /* zero out the mask elements we set */
148235aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1483b6490206SBarry Smith   }
148435aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1485b6490206SBarry Smith 
1486b6490206SBarry Smith   PetscFree(rowlengths);
1487b6490206SBarry Smith   PetscFree(browlengths);
1488b6490206SBarry Smith   PetscFree(aa);
1489b6490206SBarry Smith   PetscFree(jj);
1490b6490206SBarry Smith   PetscFree(mask);
1491b6490206SBarry Smith 
1492b6490206SBarry Smith   B->assembled = PETSC_TRUE;
1493de6a44a3SBarry Smith 
1494de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1495de6a44a3SBarry Smith   if (flg) {
149619bcc07fSBarry Smith     Viewer tviewer;
149719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
149890ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
149919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
150019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1501de6a44a3SBarry Smith   }
1502de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1503de6a44a3SBarry Smith   if (flg) {
150419bcc07fSBarry Smith     Viewer tviewer;
150519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
150690ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
150719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
150819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1509de6a44a3SBarry Smith   }
1510de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1511de6a44a3SBarry Smith   if (flg) {
151219bcc07fSBarry Smith     Viewer tviewer;
151319bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
151419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
151519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1516de6a44a3SBarry Smith   }
1517de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1518de6a44a3SBarry Smith   if (flg) {
151919bcc07fSBarry Smith     Viewer tviewer;
152019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
152190ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
152219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
152319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1524de6a44a3SBarry Smith   }
1525de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1526de6a44a3SBarry Smith   if (flg) {
152719bcc07fSBarry Smith     Viewer tviewer;
152819bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
152919bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
153019bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
153119bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1532de6a44a3SBarry Smith   }
15332593348eSBarry Smith   return 0;
15342593348eSBarry Smith }
15352593348eSBarry Smith 
15362593348eSBarry Smith 
15372593348eSBarry Smith 
1538