xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0419e6cdc609cca2c5f755ee738b150a89e0128c)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*0419e6cdSSatish Balay static char vcid[] = "$Id: baij.c,v 1.33 1996/04/09 16:17:21 balay Exp balay $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
112593348eSBarry Smith #include "vec/vecimpl.h"
122593348eSBarry Smith #include "inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
14bb42667fSSatish Balay #define  max(a,b) (a>b ? a:b)
15bcd2baecSBarry Smith extern   int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41bcd2baecSBarry Smith   ishift = a->indexshift;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
44bcd2baecSBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45bcd2baecSBarry Smith     CHKERRQ(ierr);
46bcd2baecSBarry Smith     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
472593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
48bcd2baecSBarry Smith   } else {
49bcd2baecSBarry Smith     if (ishift == oshift) {
50bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51bcd2baecSBarry Smith     }
52bcd2baecSBarry Smith     else if (ishift == -1) {
53bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
54bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
55bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
56bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
59bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60bcd2baecSBarry Smith     } else {
61bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
62bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
63bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
64bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
67bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68bcd2baecSBarry Smith     }
69bcd2baecSBarry Smith   }
702593348eSBarry Smith   return 0;
712593348eSBarry Smith }
722593348eSBarry Smith 
73de6a44a3SBarry Smith /*
74de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
75de6a44a3SBarry Smith */
76de6a44a3SBarry Smith 
77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
78de6a44a3SBarry Smith {
79de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
807fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
81de6a44a3SBarry Smith 
82de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
847fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
85de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86de6a44a3SBarry Smith       if (a->j[j] == i) {
87de6a44a3SBarry Smith         diag[i] = j;
88de6a44a3SBarry Smith         break;
89de6a44a3SBarry Smith       }
90de6a44a3SBarry Smith     }
91de6a44a3SBarry Smith   }
92de6a44a3SBarry Smith   a->diag = diag;
93de6a44a3SBarry Smith   return 0;
94de6a44a3SBarry Smith }
952593348eSBarry Smith 
962593348eSBarry Smith #include "draw.h"
972593348eSBarry Smith #include "pinclude/pviewer.h"
9877c4ece6SBarry Smith #include "sys.h"
992593348eSBarry Smith 
100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1012593348eSBarry Smith {
102b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1037e67e3f9SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104b6490206SBarry Smith   Scalar      *aa;
1052593348eSBarry Smith 
10690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1072593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1082593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1092593348eSBarry Smith   col_lens[1] = a->m;
1102593348eSBarry Smith   col_lens[2] = a->n;
1117e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1122593348eSBarry Smith 
1132593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
114b6490206SBarry Smith   count = 0;
115b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
116b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
117b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118b6490206SBarry Smith     }
1192593348eSBarry Smith   }
12077c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1212593348eSBarry Smith   PetscFree(col_lens);
1222593348eSBarry Smith 
1232593348eSBarry Smith   /* store column indices (zero start index) */
1247e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125b6490206SBarry Smith   count = 0;
126b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
127b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
128b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
130b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1312593348eSBarry Smith         }
1322593348eSBarry Smith       }
133b6490206SBarry Smith     }
134b6490206SBarry Smith   }
1357e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136b6490206SBarry Smith   PetscFree(jj);
1372593348eSBarry Smith 
1382593348eSBarry Smith   /* store nonzero values */
1397e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140b6490206SBarry Smith   count = 0;
141b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
142b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
143b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1457e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
146b6490206SBarry Smith         }
147b6490206SBarry Smith       }
148b6490206SBarry Smith     }
149b6490206SBarry Smith   }
1507e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151b6490206SBarry Smith   PetscFree(aa);
1522593348eSBarry Smith   return 0;
1532593348eSBarry Smith }
1542593348eSBarry Smith 
155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1562593348eSBarry Smith {
157b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1587e67e3f9SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
1592593348eSBarry Smith   FILE        *fd;
1602593348eSBarry Smith   char        *outputname;
1612593348eSBarry Smith 
16290ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1632593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16490ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
1657ddc982cSLois Curfman McInnes   if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) {
1667ddc982cSLois Curfman McInnes     fprintf(fd,"  block size is %d\n",bs);
1672593348eSBarry Smith   }
16890ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
169b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1702593348eSBarry Smith   }
17144cd7ae7SLois Curfman McInnes   else if (format == ASCII_FORMAT_COMMON) {
17244cd7ae7SLois Curfman McInnes     for ( i=0; i<a->mbs; i++ ) {
17344cd7ae7SLois Curfman McInnes       for ( j=0; j<bs; j++ ) {
17444cd7ae7SLois Curfman McInnes         fprintf(fd,"row %d:",i*bs+j);
17544cd7ae7SLois Curfman McInnes         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
17644cd7ae7SLois Curfman McInnes           for ( l=0; l<bs; l++ ) {
17744cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX)
17844cd7ae7SLois Curfman McInnes           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
17944cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
18044cd7ae7SLois Curfman McInnes               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18144cd7ae7SLois Curfman McInnes           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
18244cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18344cd7ae7SLois Curfman McInnes #else
18444cd7ae7SLois Curfman McInnes           if (a->a[bs2*k + l*bs + j] != 0.0)
18544cd7ae7SLois Curfman McInnes             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18644cd7ae7SLois Curfman McInnes #endif
18744cd7ae7SLois Curfman McInnes           }
18844cd7ae7SLois Curfman McInnes         }
18944cd7ae7SLois Curfman McInnes         fprintf(fd,"\n");
19044cd7ae7SLois Curfman McInnes       }
19144cd7ae7SLois Curfman McInnes     }
19244cd7ae7SLois Curfman McInnes   }
1932593348eSBarry Smith   else {
194b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
195b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
196b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
197b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
198b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
2007e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
20188685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
2027e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
20388685aaeSLois Curfman McInnes           }
20488685aaeSLois Curfman McInnes           else {
2057e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
20688685aaeSLois Curfman McInnes           }
20788685aaeSLois Curfman McInnes #else
2087e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
20988685aaeSLois Curfman McInnes #endif
2102593348eSBarry Smith           }
2112593348eSBarry Smith         }
2122593348eSBarry Smith         fprintf(fd,"\n");
2132593348eSBarry Smith       }
2142593348eSBarry Smith     }
215b6490206SBarry Smith   }
2162593348eSBarry Smith   fflush(fd);
2172593348eSBarry Smith   return 0;
2182593348eSBarry Smith }
2192593348eSBarry Smith 
220b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
2212593348eSBarry Smith {
2222593348eSBarry Smith   Mat         A = (Mat) obj;
22319bcc07fSBarry Smith   ViewerType  vtype;
22419bcc07fSBarry Smith   int         ierr;
2252593348eSBarry Smith 
2262593348eSBarry Smith   if (!viewer) {
22719bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
2282593348eSBarry Smith   }
22919bcc07fSBarry Smith 
23019bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
23119bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
232b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
2332593348eSBarry Smith   }
23419bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
235b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
2362593348eSBarry Smith   }
23719bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
238b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
2392593348eSBarry Smith   }
24019bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
241b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
2422593348eSBarry Smith   }
2432593348eSBarry Smith   return 0;
2442593348eSBarry Smith }
245b6490206SBarry Smith 
246119a934aSSatish Balay #define CHUNKSIZE  10
247cd0e1443SSatish Balay 
248cd0e1443SSatish Balay /* This version has row oriented v  */
249cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
250cd0e1443SSatish Balay {
251cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
252cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
253cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
254cd0e1443SSatish Balay   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
2557e67e3f9SSatish Balay   int          ridx,cidx,bs2=bs*bs;
256cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
257cd0e1443SSatish Balay 
258cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
259cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
260cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
261cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
2627e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
263cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
264cd0e1443SSatish Balay     low = 0;
265cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
266cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
267cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
268cd0e1443SSatish Balay       col = in[l] - shift; bcol = col/bs;
269cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
270cd0e1443SSatish Balay       if (roworiented) {
271cd0e1443SSatish Balay         value = *v++;
272cd0e1443SSatish Balay       }
273cd0e1443SSatish Balay       else {
274cd0e1443SSatish Balay         value = v[k + l*m];
275cd0e1443SSatish Balay       }
276cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
277cd0e1443SSatish Balay       while (high-low > 5) {
278cd0e1443SSatish Balay         t = (low+high)/2;
279cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
280cd0e1443SSatish Balay         else             low  = t;
281cd0e1443SSatish Balay       }
282cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
283cd0e1443SSatish Balay         if (rp[i] > bcol) break;
284cd0e1443SSatish Balay         if (rp[i] == bcol) {
2857e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
286cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
287cd0e1443SSatish Balay           else                  *bap  = value;
288cd0e1443SSatish Balay           goto noinsert;
289cd0e1443SSatish Balay         }
290cd0e1443SSatish Balay       }
291cd0e1443SSatish Balay       if (nonew) goto noinsert;
292cd0e1443SSatish Balay       if (nrow >= rmax) {
293cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
294cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
295cd0e1443SSatish Balay         Scalar *new_a;
296cd0e1443SSatish Balay 
297cd0e1443SSatish Balay         /* malloc new storage space */
2987e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
299cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
3007e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
301cd0e1443SSatish Balay         new_i   = new_j + new_nz;
302cd0e1443SSatish Balay 
303cd0e1443SSatish Balay         /* copy over old data into new slots */
304cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
3057e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
3067e67e3f9SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
307cd0e1443SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
308cd0e1443SSatish Balay         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
309cd0e1443SSatish Balay                                                            len*sizeof(int));
3107e67e3f9SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
3117e67e3f9SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
3127e67e3f9SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
3137e67e3f9SSatish Balay                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
314cd0e1443SSatish Balay         /* free up old matrix storage */
315cd0e1443SSatish Balay         PetscFree(a->a);
316cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
317cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
318cd0e1443SSatish Balay         a->singlemalloc = 1;
319cd0e1443SSatish Balay 
3207e67e3f9SSatish Balay         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
321cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
3227e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
323119a934aSSatish Balay         a->maxnz += CHUNKSIZE;
324cd0e1443SSatish Balay         a->reallocs++;
325119a934aSSatish Balay         a->nz++;
326cd0e1443SSatish Balay       }
3277e67e3f9SSatish Balay       N = nrow++ - 1;
328cd0e1443SSatish Balay       /* shift up all the later entries in this row */
329cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
330cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
3317e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
332cd0e1443SSatish Balay       }
3337e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
334cd0e1443SSatish Balay       rp[i] = bcol;
3357e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
336cd0e1443SSatish Balay       noinsert:;
337cd0e1443SSatish Balay       low = i;
338cd0e1443SSatish Balay     }
339cd0e1443SSatish Balay     ailen[brow] = nrow;
340cd0e1443SSatish Balay   }
341cd0e1443SSatish Balay   return 0;
342cd0e1443SSatish Balay }
343cd0e1443SSatish Balay 
3440b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
3450b824a48SSatish Balay {
3460b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3470b824a48SSatish Balay   *m = a->m; *n = a->n;
3480b824a48SSatish Balay   return 0;
3490b824a48SSatish Balay }
3500b824a48SSatish Balay 
3510b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
3520b824a48SSatish Balay {
3530b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3540b824a48SSatish Balay   *m = 0; *n = a->m;
3550b824a48SSatish Balay   return 0;
3560b824a48SSatish Balay }
3570b824a48SSatish Balay 
3589867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3599867e207SSatish Balay {
3609867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3617e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
3629867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
3639867e207SSatish Balay 
3649867e207SSatish Balay   bs  = a->bs;
3659867e207SSatish Balay   ai  = a->i;
3669867e207SSatish Balay   aj  = a->j;
3679867e207SSatish Balay   aa  = a->a;
3687e67e3f9SSatish Balay   bs2 = bs*bs;
3699867e207SSatish Balay 
3709867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
3719867e207SSatish Balay 
3729867e207SSatish Balay   bn  = row/bs;   /* Block number */
3739867e207SSatish Balay   bp  = row % bs; /* Block Position */
3749867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
3759867e207SSatish Balay   *nz = bs*M;
3769867e207SSatish Balay 
3779867e207SSatish Balay   if (v) {
3789867e207SSatish Balay     *v = 0;
3799867e207SSatish Balay     if (*nz) {
3809867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
3819867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3829867e207SSatish Balay         v_i  = *v + i*bs;
3837e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
3847e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
3859867e207SSatish Balay       }
3869867e207SSatish Balay     }
3879867e207SSatish Balay   }
3889867e207SSatish Balay 
3899867e207SSatish Balay   if (idx) {
3909867e207SSatish Balay     *idx = 0;
3919867e207SSatish Balay     if (*nz) {
3929867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
3939867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3949867e207SSatish Balay         idx_i = *idx + i*bs;
3959867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
3969867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
3979867e207SSatish Balay       }
3989867e207SSatish Balay     }
3999867e207SSatish Balay   }
4009867e207SSatish Balay   return 0;
4019867e207SSatish Balay }
4029867e207SSatish Balay 
4039867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
4049867e207SSatish Balay {
4059867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
4069867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
4079867e207SSatish Balay   return 0;
4089867e207SSatish Balay }
409b6490206SBarry Smith 
410f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
411f2501298SSatish Balay {
412f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
413f2501298SSatish Balay   Mat         C;
414f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
415f2501298SSatish Balay   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
416f2501298SSatish Balay   Scalar      *array=a->a;
417f2501298SSatish Balay 
418f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
419f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
420f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
421f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
422f2501298SSatish Balay   if (shift) {
423f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
424f2501298SSatish Balay   }
425f2501298SSatish Balay   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
426f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
427f2501298SSatish Balay   PetscFree(col);
428f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
429f2501298SSatish Balay   cols = rows + bs;
430f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
431f2501298SSatish Balay     cols[0] = i*bs;
432f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
433f2501298SSatish Balay     len = ai[i+1] - ai[i];
434f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
435f2501298SSatish Balay       rows[0] = (*aj++)*bs;
436f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
437f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
438f2501298SSatish Balay       array += bs2;
439f2501298SSatish Balay     }
440f2501298SSatish Balay   }
4411073c447SSatish Balay   PetscFree(rows);
442f2501298SSatish Balay   if (shift) {
443f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
444f2501298SSatish Balay   }
445f2501298SSatish Balay 
446f2501298SSatish Balay   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
447f2501298SSatish Balay   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
448f2501298SSatish Balay 
449f2501298SSatish Balay   if (B != PETSC_NULL) {
450f2501298SSatish Balay     *B = C;
451f2501298SSatish Balay   } else {
452f2501298SSatish Balay     /* This isn't really an in-place transpose */
453f2501298SSatish Balay     PetscFree(a->a);
454f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
455f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
456f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
457f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
458f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
459f2501298SSatish Balay     PetscFree(a);
460f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
461f2501298SSatish Balay     PetscHeaderDestroy(C);
462f2501298SSatish Balay   }
463f2501298SSatish Balay   return 0;
464f2501298SSatish Balay }
465f2501298SSatish Balay 
466f2501298SSatish Balay 
467584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
468584200bdSSatish Balay {
469584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
470584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
471584200bdSSatish Balay   int        m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift;
472584200bdSSatish Balay   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
473584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
474584200bdSSatish Balay 
475584200bdSSatish Balay   if (mode == FLUSH_ASSEMBLY) return 0;
476584200bdSSatish Balay 
477584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
478584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
479584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
480584200bdSSatish Balay     if (fshift) {
481584200bdSSatish Balay       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
482584200bdSSatish Balay       N = ailen[i];
483584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
484584200bdSSatish Balay         ip[j-fshift] = ip[j];
4857e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
486584200bdSSatish Balay       }
487584200bdSSatish Balay     }
488584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
489584200bdSSatish Balay   }
490584200bdSSatish Balay   if (mbs) {
491584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
492584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
493584200bdSSatish Balay   }
494584200bdSSatish Balay   /* reset ilen and imax for each row */
495584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
496584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
497584200bdSSatish Balay   }
498119a934aSSatish Balay   a->nz = ai[mbs] + shift;
499584200bdSSatish Balay 
500584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
501584200bdSSatish Balay   if (fshift && a->diag) {
502584200bdSSatish Balay     PetscFree(a->diag);
503584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
504584200bdSSatish Balay     a->diag = 0;
505584200bdSSatish Balay   }
506584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
507584200bdSSatish Balay            fshift,a->nz,m);
508584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
509584200bdSSatish Balay            a->reallocs);
510584200bdSSatish Balay   return 0;
511584200bdSSatish Balay }
512584200bdSSatish Balay 
513b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
5142593348eSBarry Smith {
515b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
516de6a44a3SBarry Smith   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
5172593348eSBarry Smith   return 0;
5182593348eSBarry Smith }
5192593348eSBarry Smith 
520b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
5212593348eSBarry Smith {
5222593348eSBarry Smith   Mat         A  = (Mat) obj;
523b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5242593348eSBarry Smith 
5252593348eSBarry Smith #if defined(PETSC_LOG)
5262593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
5272593348eSBarry Smith #endif
5282593348eSBarry Smith   PetscFree(a->a);
5292593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
5302593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
5312593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
5322593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
5332593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
534de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
5352593348eSBarry Smith   PetscFree(a);
536f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
537f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
5382593348eSBarry Smith   return 0;
5392593348eSBarry Smith }
5402593348eSBarry Smith 
541b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
5422593348eSBarry Smith {
543b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5442593348eSBarry Smith   if      (op == ROW_ORIENTED)              a->roworiented = 1;
5452593348eSBarry Smith   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
5462593348eSBarry Smith   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
5472593348eSBarry Smith   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
5482593348eSBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
5492593348eSBarry Smith   else if (op == ROWS_SORTED ||
5502593348eSBarry Smith            op == SYMMETRIC_MATRIX ||
5512593348eSBarry Smith            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5522593348eSBarry Smith            op == YES_NEW_DIAGONALS)
55394a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
5542593348eSBarry Smith   else if (op == NO_NEW_DIAGONALS)
555b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
5562593348eSBarry Smith   else
557b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
5582593348eSBarry Smith   return 0;
5592593348eSBarry Smith }
5602593348eSBarry Smith 
5612593348eSBarry Smith 
5622593348eSBarry Smith /* -------------------------------------------------------*/
5632593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
5642593348eSBarry Smith /* -------------------------------------------------------*/
565b6490206SBarry Smith #include "pinclude/plapack.h"
566b6490206SBarry Smith 
567bb42667fSSatish Balay static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
5682593348eSBarry Smith {
569b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
570bb42667fSSatish Balay   Scalar          *xg,*y,*zg;
571bb42667fSSatish Balay   register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5;
572b6490206SBarry Smith   register Scalar x1,x2,x3,x4,x5;
573b6490206SBarry Smith   int             mbs=a->mbs,m=a->m,i,*idx,*ii;
574bb42667fSSatish Balay   int             bs=a->bs,j,n,bs2=bs*bs,ierr;
5752593348eSBarry Smith 
576bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
577bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
578b6490206SBarry Smith 
579bb42667fSSatish Balay   if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */
580bb42667fSSatish Balay   else if (zz!=yy){
581bb42667fSSatish Balay     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
582bb42667fSSatish Balay     PetscMemcpy(z,y,m*sizeof(Scalar));
583*0419e6cdSSatish Balay     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
5842593348eSBarry Smith   }
585119a934aSSatish Balay 
586119a934aSSatish Balay   idx   = a->j;
587119a934aSSatish Balay   v     = a->a;
588119a934aSSatish Balay   ii    = a->i;
589119a934aSSatish Balay 
590119a934aSSatish Balay   switch (bs) {
591119a934aSSatish Balay   case 1:
592119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
593119a934aSSatish Balay       n    = ii[1] - ii[0]; ii++;
594119a934aSSatish Balay       sum  = 0.0;
595119a934aSSatish Balay       while (n--) sum += *v++ * x[*idx++];
596119a934aSSatish Balay       z[i] += sum;
597119a934aSSatish Balay     }
598119a934aSSatish Balay     break;
599119a934aSSatish Balay   case 2:
600119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
601119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
602119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0;
603119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
604119a934aSSatish Balay         xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
605119a934aSSatish Balay         sum1 += v[0]*x1 + v[2]*x2;
606119a934aSSatish Balay         sum2 += v[1]*x1 + v[3]*x2;
607119a934aSSatish Balay         v += 4;
608119a934aSSatish Balay       }
609119a934aSSatish Balay       z[0] += sum1; z[1] += sum2;
610119a934aSSatish Balay       z += 2;
611119a934aSSatish Balay     }
612119a934aSSatish Balay     break;
613119a934aSSatish Balay   case 3:
614119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
615119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
616119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
617119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
618119a934aSSatish Balay         xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
619119a934aSSatish Balay         sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
620119a934aSSatish Balay         sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
621119a934aSSatish Balay         sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
622119a934aSSatish Balay         v += 9;
623119a934aSSatish Balay       }
624119a934aSSatish Balay       z[0] += sum1; z[1] += sum2; z[2] += sum3;
625119a934aSSatish Balay       z += 3;
626119a934aSSatish Balay     }
627119a934aSSatish Balay     break;
628119a934aSSatish Balay   case 4:
629119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
630119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
631119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
632119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
633119a934aSSatish Balay         xb = x + 4*(*idx++);
634119a934aSSatish Balay         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
635119a934aSSatish Balay         sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
636119a934aSSatish Balay         sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
637119a934aSSatish Balay         sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
638119a934aSSatish Balay         sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
639119a934aSSatish Balay         v += 16;
640119a934aSSatish Balay       }
641119a934aSSatish Balay       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4;
642119a934aSSatish Balay       z += 4;
643119a934aSSatish Balay     }
644119a934aSSatish Balay     break;
645119a934aSSatish Balay   case 5:
646119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
647119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
648119a934aSSatish Balay       sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
649119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
650119a934aSSatish Balay         xb = x + 5*(*idx++);
651119a934aSSatish Balay         x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
652119a934aSSatish Balay         sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
653119a934aSSatish Balay         sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
654119a934aSSatish Balay         sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
655119a934aSSatish Balay         sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
656119a934aSSatish Balay         sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
657119a934aSSatish Balay         v += 25;
658119a934aSSatish Balay       }
659119a934aSSatish Balay       z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5;
660119a934aSSatish Balay       z += 5;
661119a934aSSatish Balay     }
662119a934aSSatish Balay     break;
663119a934aSSatish Balay     /* block sizes larger then 5 by 5 are handled by BLAS */
664119a934aSSatish Balay   default: {
665119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
666119a934aSSatish Balay       if (!a->mult_work) {
667bb42667fSSatish Balay         k = max(a->m,a->n);
668bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
669119a934aSSatish Balay         CHKPTRQ(a->mult_work);
670119a934aSSatish Balay       }
671119a934aSSatish Balay       work = a->mult_work;
672119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
673119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
674119a934aSSatish Balay         ncols = n*bs;
675119a934aSSatish Balay         workt = work;
676119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
677119a934aSSatish Balay           xb = x + bs*(*idx++);
678119a934aSSatish Balay           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
679119a934aSSatish Balay           workt += bs;
680119a934aSSatish Balay         }
681119a934aSSatish Balay         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
682119a934aSSatish Balay         v += n*bs2;
683119a934aSSatish Balay         z += bs;
684119a934aSSatish Balay       }
685119a934aSSatish Balay     }
686119a934aSSatish Balay   }
687*0419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
688*0419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
689bb42667fSSatish Balay   return 0;
690bb42667fSSatish Balay }
691bb42667fSSatish Balay 
692bb42667fSSatish Balay static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy)
693bb42667fSSatish Balay {
694bb42667fSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
695bb42667fSSatish Balay   int         ierr,bs=a->bs,bs2=bs*bs;
696bb42667fSSatish Balay 
697bb42667fSSatish Balay   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
698bb42667fSSatish Balay   PLogFlops(2*bs2*(a->nz)-a->m);
699bb42667fSSatish Balay   return 0;
700bb42667fSSatish Balay }
701bb42667fSSatish Balay 
702bb42667fSSatish Balay static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
703bb42667fSSatish Balay {
704bb42667fSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
705bb42667fSSatish Balay   int         ierr,bs=a->bs,bs2=bs*bs;
706bb42667fSSatish Balay 
707bb42667fSSatish Balay   ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
708119a934aSSatish Balay   PLogFlops(2*bs2*(a->nz));
709119a934aSSatish Balay   return 0;
710119a934aSSatish Balay }
711119a934aSSatish Balay 
712bb42667fSSatish Balay static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz)
713119a934aSSatish Balay {
714119a934aSSatish Balay   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
715bb42667fSSatish Balay   Scalar          *xg,*y,*zg,*zb;
716bb42667fSSatish Balay   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
717119a934aSSatish Balay   int             mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval;
718bb42667fSSatish Balay   int             bs=a->bs,j,n,bs2=bs*bs,*ib,ierr;
719119a934aSSatish Balay 
720119a934aSSatish Balay 
721bb42667fSSatish Balay   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
722bb42667fSSatish Balay   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
723bb42667fSSatish Balay 
724bb42667fSSatish Balay   if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */
725bb42667fSSatish Balay   else if (zz!=yy){
726bb42667fSSatish Balay     ierr = VecGetArray(yy,&y); CHKERRQ(ierr);
727bb42667fSSatish Balay     PetscMemcpy(z,y,N*sizeof(Scalar));
728*0419e6cdSSatish Balay     ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr);
729bb42667fSSatish Balay   }
730119a934aSSatish Balay 
731119a934aSSatish Balay   idx   = a->j;
732119a934aSSatish Balay   v     = a->a;
733119a934aSSatish Balay   ii    = a->i;
734119a934aSSatish Balay 
735119a934aSSatish Balay   switch (bs) {
736119a934aSSatish Balay   case 1:
737119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
738119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
739119a934aSSatish Balay       xb = x + i; x1 = xb[0];
740119a934aSSatish Balay       ib = idx + ai[i];
741119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
742119a934aSSatish Balay         z[ib[j]] += *v++ * x1;
743119a934aSSatish Balay       }
744119a934aSSatish Balay     }
745119a934aSSatish Balay     break;
746119a934aSSatish Balay   case 2:
747119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
748119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
749119a934aSSatish Balay       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
750119a934aSSatish Balay       ib = idx + ai[i];
751119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
752119a934aSSatish Balay         rval = ib[j]*2;
753119a934aSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2;
754119a934aSSatish Balay         z[rval++] += v[2]*x1 + v[3]*x2;
755119a934aSSatish Balay         v += 4;
756119a934aSSatish Balay       }
757119a934aSSatish Balay     }
758119a934aSSatish Balay     break;
759119a934aSSatish Balay   case 3:
760119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
761119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
762119a934aSSatish Balay       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
763119a934aSSatish Balay       ib = idx + ai[i];
764119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
765119a934aSSatish Balay         rval = ib[j]*3;
766119a934aSSatish Balay         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
767119a934aSSatish Balay         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
768119a934aSSatish Balay         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
769119a934aSSatish Balay         v += 9;
770119a934aSSatish Balay       }
771119a934aSSatish Balay     }
772119a934aSSatish Balay     break;
773119a934aSSatish Balay   case 4:
774119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
775119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
776119a934aSSatish Balay       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
777119a934aSSatish Balay       ib = idx + ai[i];
778119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
779119a934aSSatish Balay         rval = ib[j]*4;
780119a934aSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
781119a934aSSatish Balay         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
782119a934aSSatish Balay         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
783119a934aSSatish Balay         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
784119a934aSSatish Balay         v += 16;
785119a934aSSatish Balay       }
786119a934aSSatish Balay     }
787119a934aSSatish Balay     break;
788119a934aSSatish Balay   case 5:
789119a934aSSatish Balay     for ( i=0; i<mbs; i++ ) {
790119a934aSSatish Balay       n  = ii[1] - ii[0]; ii++;
791119a934aSSatish Balay       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
792119a934aSSatish Balay       x4 = xb[3];   x5 = xb[4];
793119a934aSSatish Balay       ib = idx + ai[i];
794119a934aSSatish Balay       for ( j=0; j<n; j++ ) {
795119a934aSSatish Balay         rval = ib[j]*5;
796119a934aSSatish Balay         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
797119a934aSSatish Balay         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
798119a934aSSatish Balay         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
799119a934aSSatish Balay         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
800119a934aSSatish Balay         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
801119a934aSSatish Balay         v += 25;
802119a934aSSatish Balay       }
803119a934aSSatish Balay     }
804119a934aSSatish Balay     break;
805119a934aSSatish Balay       /* block sizes larger then 5 by 5 are handled by BLAS */
806119a934aSSatish Balay     default: {
807119a934aSSatish Balay       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
808119a934aSSatish Balay       if (!a->mult_work) {
809bb42667fSSatish Balay         k = max(a->m,a->n);
810bb42667fSSatish Balay         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
811119a934aSSatish Balay         CHKPTRQ(a->mult_work);
812119a934aSSatish Balay       }
813119a934aSSatish Balay       work = a->mult_work;
814119a934aSSatish Balay       for ( i=0; i<mbs; i++ ) {
815119a934aSSatish Balay         n     = ii[1] - ii[0]; ii++;
816119a934aSSatish Balay         ncols = n*bs;
817119a934aSSatish Balay         PetscMemzero(work,ncols*sizeof(Scalar));
818119a934aSSatish Balay         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
819119a934aSSatish Balay         v += n*bs2;
820119a934aSSatish Balay         x += bs;
821119a934aSSatish Balay         workt = work;
822119a934aSSatish Balay         for ( j=0; j<n; j++ ) {
823119a934aSSatish Balay           zb = z + bs*(*idx++);
824119a934aSSatish Balay           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
825119a934aSSatish Balay           workt += bs;
826119a934aSSatish Balay         }
827119a934aSSatish Balay       }
828119a934aSSatish Balay     }
829119a934aSSatish Balay   }
830*0419e6cdSSatish Balay   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
831*0419e6cdSSatish Balay   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
8322593348eSBarry Smith   return 0;
8332593348eSBarry Smith }
8342593348eSBarry Smith 
835bb42667fSSatish Balay static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy)
836bb42667fSSatish Balay {
837bb42667fSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
838bb42667fSSatish Balay   int         ierr,bs=a->bs,bs2=bs*bs;
839bb42667fSSatish Balay 
840bb42667fSSatish Balay   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr);
841bb42667fSSatish Balay   PLogFlops(2*bs2*(a->nz)-a->n);
842bb42667fSSatish Balay   return 0;
843bb42667fSSatish Balay }
844bb42667fSSatish Balay 
845bb42667fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
846bb42667fSSatish Balay {
847bb42667fSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
848bb42667fSSatish Balay   int         ierr,bs=a->bs,bs2=bs*bs;
849bb42667fSSatish Balay 
850bb42667fSSatish Balay   ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr);
851bb42667fSSatish Balay   PLogFlops(2*bs2*(a->nz));
852bb42667fSSatish Balay   return 0;
853bb42667fSSatish Balay }
854bb42667fSSatish Balay 
855de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
8562593348eSBarry Smith {
857b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
858bcd2baecSBarry Smith   if (nz)  *nz  = a->bs*a->bs*a->nz;
859bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
860bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
8612593348eSBarry Smith   return 0;
8622593348eSBarry Smith }
8632593348eSBarry Smith 
86491d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
86591d316f6SSatish Balay {
86691d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
86791d316f6SSatish Balay 
86891d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
86991d316f6SSatish Balay 
87091d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
87191d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
87291d316f6SSatish Balay       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
87391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
87491d316f6SSatish Balay   }
87591d316f6SSatish Balay 
87691d316f6SSatish Balay   /* if the a->i are the same */
87791d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
87891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
87991d316f6SSatish Balay   }
88091d316f6SSatish Balay 
88191d316f6SSatish Balay   /* if a->j are the same */
88291d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
88391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
88491d316f6SSatish Balay   }
88591d316f6SSatish Balay 
88691d316f6SSatish Balay   /* if a->a are the same */
88791d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
88891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
88991d316f6SSatish Balay   }
89091d316f6SSatish Balay   *flg = PETSC_TRUE;
89191d316f6SSatish Balay   return 0;
89291d316f6SSatish Balay 
89391d316f6SSatish Balay }
89491d316f6SSatish Balay 
89591d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
89691d316f6SSatish Balay {
89791d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8987e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
89917e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
90017e48fc4SSatish Balay 
90117e48fc4SSatish Balay   bs   = a->bs;
90217e48fc4SSatish Balay   aa   = a->a;
90317e48fc4SSatish Balay   ai   = a->i;
90417e48fc4SSatish Balay   aj   = a->j;
90517e48fc4SSatish Balay   ambs = a->mbs;
9067e67e3f9SSatish Balay   bs2  = bs*bs;
90791d316f6SSatish Balay 
90891d316f6SSatish Balay   VecSet(&zero,v);
90991d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
9109867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
91117e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
91217e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
91317e48fc4SSatish Balay       if (aj[j] == i) {
91417e48fc4SSatish Balay         row  = i*bs;
9157e67e3f9SSatish Balay         aa_j = aa+j*bs2;
9167e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
91791d316f6SSatish Balay         break;
91891d316f6SSatish Balay       }
91991d316f6SSatish Balay     }
92091d316f6SSatish Balay   }
92191d316f6SSatish Balay   return 0;
92291d316f6SSatish Balay }
92391d316f6SSatish Balay 
9249867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
9259867e207SSatish Balay {
9269867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9279867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
9287e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
9299867e207SSatish Balay 
9309867e207SSatish Balay   ai  = a->i;
9319867e207SSatish Balay   aj  = a->j;
9329867e207SSatish Balay   aa  = a->a;
9339867e207SSatish Balay   m   = a->m;
9349867e207SSatish Balay   n   = a->n;
9359867e207SSatish Balay   bs  = a->bs;
9369867e207SSatish Balay   mbs = a->mbs;
9377e67e3f9SSatish Balay   bs2 = bs*bs;
9389867e207SSatish Balay   if (ll) {
9399867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
9409867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
9419867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
9429867e207SSatish Balay       M  = ai[i+1] - ai[i];
9439867e207SSatish Balay       li = l + i*bs;
9447e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
9459867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
9467e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
9479867e207SSatish Balay           (*v++) *= li[k%bs];
9489867e207SSatish Balay         }
9499867e207SSatish Balay       }
9509867e207SSatish Balay     }
9519867e207SSatish Balay   }
9529867e207SSatish Balay 
9539867e207SSatish Balay   if (rr) {
9549867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
9559867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
9569867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
9579867e207SSatish Balay       M  = ai[i+1] - ai[i];
9587e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
9599867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
9609867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
9619867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
9629867e207SSatish Balay           x = ri[k];
9639867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
9649867e207SSatish Balay         }
9659867e207SSatish Balay       }
9669867e207SSatish Balay     }
9679867e207SSatish Balay   }
9689867e207SSatish Balay   return 0;
9699867e207SSatish Balay }
9709867e207SSatish Balay 
9719867e207SSatish Balay 
972b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
973b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
974b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
9757fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
9767fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
9777fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
9787fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
9797fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
9807fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
9812593348eSBarry Smith 
982b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
9832593348eSBarry Smith {
984b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
9852593348eSBarry Smith   Scalar      *v = a->a;
9862593348eSBarry Smith   double      sum = 0.0;
987*0419e6cdSSatish Balay   int         i,bs=a->bs,nz=a->nz,bs2=bs*bs;
9882593348eSBarry Smith 
9892593348eSBarry Smith   if (type == NORM_FROBENIUS) {
990*0419e6cdSSatish Balay     for (i=0; i< bs2*nz; i++ ) {
9912593348eSBarry Smith #if defined(PETSC_COMPLEX)
9922593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
9932593348eSBarry Smith #else
9942593348eSBarry Smith       sum += (*v)*(*v); v++;
9952593348eSBarry Smith #endif
9962593348eSBarry Smith     }
9972593348eSBarry Smith     *norm = sqrt(sum);
9982593348eSBarry Smith   }
9992593348eSBarry Smith   else {
1000b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
10012593348eSBarry Smith   }
10022593348eSBarry Smith   return 0;
10032593348eSBarry Smith }
10042593348eSBarry Smith 
10052593348eSBarry Smith /*
10062593348eSBarry Smith      note: This can only work for identity for row and col. It would
10072593348eSBarry Smith    be good to check this and otherwise generate an error.
10082593348eSBarry Smith */
1009b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
10102593348eSBarry Smith {
1011b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
10122593348eSBarry Smith   Mat         outA;
1013de6a44a3SBarry Smith   int         ierr;
10142593348eSBarry Smith 
1015b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
10162593348eSBarry Smith 
10172593348eSBarry Smith   outA          = inA;
10182593348eSBarry Smith   inA->factor   = FACTOR_LU;
10192593348eSBarry Smith   a->row        = row;
10202593348eSBarry Smith   a->col        = col;
10212593348eSBarry Smith 
1022de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
10232593348eSBarry Smith 
10242593348eSBarry Smith   if (!a->diag) {
1025b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
10262593348eSBarry Smith   }
10277fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
10282593348eSBarry Smith   return 0;
10292593348eSBarry Smith }
10302593348eSBarry Smith 
1031b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
10322593348eSBarry Smith {
1033b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1034b6490206SBarry Smith   int         one = 1, totalnz = a->bs*a->bs*a->nz;
1035b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
1036b6490206SBarry Smith   PLogFlops(totalnz);
10372593348eSBarry Smith   return 0;
10382593348eSBarry Smith }
10392593348eSBarry Smith 
10407e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
10417e67e3f9SSatish Balay {
10427e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
10437e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
10447e67e3f9SSatish Balay   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
10457e67e3f9SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
10467e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
10477e67e3f9SSatish Balay 
10487e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
10497e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
10507e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
10517e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
10527e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
10537e67e3f9SSatish Balay     nrow = ailen[brow];
10547e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
10557e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
10567e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
10577e67e3f9SSatish Balay       col  = in[l] - shift;
10587e67e3f9SSatish Balay       bcol = col/bs;
10597e67e3f9SSatish Balay       cidx = col%bs;
10607e67e3f9SSatish Balay       ridx = row%bs;
10617e67e3f9SSatish Balay       high = nrow;
10627e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
10637e67e3f9SSatish Balay       while (high-low > 5) {
10647e67e3f9SSatish Balay         t = (low+high)/2;
10657e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
10667e67e3f9SSatish Balay         else             low  = t;
10677e67e3f9SSatish Balay       }
10687e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
10697e67e3f9SSatish Balay         if (rp[i] > bcol) break;
10707e67e3f9SSatish Balay         if (rp[i] == bcol) {
10717e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
10727e67e3f9SSatish Balay           goto finished;
10737e67e3f9SSatish Balay         }
10747e67e3f9SSatish Balay       }
10757e67e3f9SSatish Balay       *v++ = zero;
10767e67e3f9SSatish Balay       finished:;
10777e67e3f9SSatish Balay     }
10787e67e3f9SSatish Balay   }
10797e67e3f9SSatish Balay   return 0;
10807e67e3f9SSatish Balay }
10817e67e3f9SSatish Balay 
1082b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A)
10832593348eSBarry Smith {
10842593348eSBarry Smith   static int called = 0;
10852593348eSBarry Smith 
10862593348eSBarry Smith   if (called) return 0; else called = 1;
10872593348eSBarry Smith   return 0;
10882593348eSBarry Smith }
10892593348eSBarry Smith /* -------------------------------------------------------------------*/
1090cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
10919867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1092119a934aSSatish Balay        MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ,
1093bb42667fSSatish Balay        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1094de6a44a3SBarry Smith        MatSolve_SeqBAIJ,0,
1095b6490206SBarry Smith        0,0,
1096de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
1097b6490206SBarry Smith        0,
1098f2501298SSatish Balay        MatTranspose_SeqBAIJ,
109917e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
11009867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1101584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
1102b6490206SBarry Smith        0,
1103b6490206SBarry Smith        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
1104b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
11057fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1106b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1107de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
1108b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1109b6490206SBarry Smith        0,0,
1110b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
1111b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
1112b6490206SBarry Smith        0,0,
11137e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
1114b6490206SBarry Smith        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1115b6490206SBarry Smith        0};
11162593348eSBarry Smith 
11172593348eSBarry Smith /*@C
111844cd7ae7SLois Curfman McInnes    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
111944cd7ae7SLois Curfman McInnes    compressed row) format.  For good matrix assembly performance the
112044cd7ae7SLois Curfman McInnes    user should preallocate the matrix storage by setting the parameter nz
11212593348eSBarry Smith    (or nzz).  By setting these parameters accurately, performance can be
11222593348eSBarry Smith    increased by more than a factor of 50.
11232593348eSBarry Smith 
11242593348eSBarry Smith    Input Parameters:
11252593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
1126b6490206SBarry Smith .  bs - size of block
11272593348eSBarry Smith .  m - number of rows
11282593348eSBarry Smith .  n - number of columns
1129b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
1130b6490206SBarry Smith .  nzz - number of block nonzeros per block row or PETSC_NULL
1131b6490206SBarry Smith          (possibly different for each row)
11322593348eSBarry Smith 
11332593348eSBarry Smith    Output Parameter:
11342593348eSBarry Smith .  A - the matrix
11352593348eSBarry Smith 
11362593348eSBarry Smith    Notes:
113744cd7ae7SLois Curfman McInnes    The block AIJ format is fully compatible with standard Fortran 77
11382593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
113944cd7ae7SLois Curfman McInnes    either one (as in Fortran) or zero.  See the users' manual for details.
11402593348eSBarry Smith 
11412593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
11422593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
11432593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
11442593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
11452593348eSBarry Smith 
114644cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
11472593348eSBarry Smith @*/
1148b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
11492593348eSBarry Smith {
11502593348eSBarry Smith   Mat         B;
1151b6490206SBarry Smith   Mat_SeqBAIJ *b;
1152f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
1153b6490206SBarry Smith 
1154f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
1155f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
11562593348eSBarry Smith 
11572593348eSBarry Smith   *A = 0;
1158b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
11592593348eSBarry Smith   PLogObjectCreate(B);
1160b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
116144cd7ae7SLois Curfman McInnes   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
11622593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
11637fc0212eSBarry Smith   switch (bs) {
11647fc0212eSBarry Smith     case 1:
11657fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
11667fc0212eSBarry Smith       break;
11674eeb42bcSBarry Smith     case 2:
11684eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
11696d84be18SBarry Smith       break;
1170f327dd97SBarry Smith     case 3:
1171f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
11724eeb42bcSBarry Smith       break;
11736d84be18SBarry Smith     case 4:
11746d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
11756d84be18SBarry Smith       break;
11766d84be18SBarry Smith     case 5:
11776d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
11786d84be18SBarry Smith       break;
11797fc0212eSBarry Smith   }
1180b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
1181b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
11822593348eSBarry Smith   B->factor           = 0;
11832593348eSBarry Smith   B->lupivotthreshold = 1.0;
11842593348eSBarry Smith   b->row              = 0;
11852593348eSBarry Smith   b->col              = 0;
11862593348eSBarry Smith   b->reallocs         = 0;
11872593348eSBarry Smith 
118844cd7ae7SLois Curfman McInnes   b->m       = m; B->m = m; B->M = m;
118944cd7ae7SLois Curfman McInnes   b->n       = n; B->n = n; B->N = n;
1190b6490206SBarry Smith   b->mbs     = mbs;
1191f2501298SSatish Balay   b->nbs     = nbs;
1192b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
11932593348eSBarry Smith   if (nnz == PETSC_NULL) {
1194119a934aSSatish Balay     if (nz == PETSC_DEFAULT) nz = 5;
11952593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1196b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1197b6490206SBarry Smith     nz = nz*mbs;
11982593348eSBarry Smith   }
11992593348eSBarry Smith   else {
12002593348eSBarry Smith     nz = 0;
1201b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
12022593348eSBarry Smith   }
12032593348eSBarry Smith 
12042593348eSBarry Smith   /* allocate the matrix space */
12057e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
12062593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
12077e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
12087e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
12092593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
12102593348eSBarry Smith   b->i  = b->j + nz;
12112593348eSBarry Smith   b->singlemalloc = 1;
12122593348eSBarry Smith 
1213de6a44a3SBarry Smith   b->i[0] = 0;
1214b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
12152593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
12162593348eSBarry Smith   }
12172593348eSBarry Smith 
1218b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1219b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1220b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1221b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
12222593348eSBarry Smith 
1223b6490206SBarry Smith   b->bs               = bs;
1224b6490206SBarry Smith   b->mbs              = mbs;
12252593348eSBarry Smith   b->nz               = 0;
12262593348eSBarry Smith   b->maxnz            = nz;
12272593348eSBarry Smith   b->sorted           = 0;
12282593348eSBarry Smith   b->roworiented      = 1;
12292593348eSBarry Smith   b->nonew            = 0;
12302593348eSBarry Smith   b->diag             = 0;
12312593348eSBarry Smith   b->solve_work       = 0;
1232de6a44a3SBarry Smith   b->mult_work        = 0;
12332593348eSBarry Smith   b->spptr            = 0;
12341073c447SSatish Balay   b->indexshift       = 0;
12352593348eSBarry Smith   *A = B;
12362593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
12372593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
12382593348eSBarry Smith   return 0;
12392593348eSBarry Smith }
12402593348eSBarry Smith 
1241b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
12422593348eSBarry Smith {
12432593348eSBarry Smith   Mat         C;
1244b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
12457e67e3f9SSatish Balay   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1246de6a44a3SBarry Smith 
1247de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
12482593348eSBarry Smith 
12492593348eSBarry Smith   *B = 0;
1250b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
12512593348eSBarry Smith   PLogObjectCreate(C);
1252b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
12532593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1254b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1255b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
12562593348eSBarry Smith   C->factor     = A->factor;
12572593348eSBarry Smith   c->row        = 0;
12582593348eSBarry Smith   c->col        = 0;
12592593348eSBarry Smith   C->assembled  = PETSC_TRUE;
12602593348eSBarry Smith 
126144cd7ae7SLois Curfman McInnes   c->m = C->m   = a->m;
126244cd7ae7SLois Curfman McInnes   c->n = C->n   = a->n;
126344cd7ae7SLois Curfman McInnes   C->M          = a->m;
126444cd7ae7SLois Curfman McInnes   C->N          = a->n;
126544cd7ae7SLois Curfman McInnes 
1266b6490206SBarry Smith   c->bs         = a->bs;
1267b6490206SBarry Smith   c->mbs        = a->mbs;
1268b6490206SBarry Smith   c->nbs        = a->nbs;
12691073c447SSatish Balay   c->indexshift = 0;
12702593348eSBarry Smith 
1271b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1272b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1273b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
12742593348eSBarry Smith     c->imax[i] = a->imax[i];
12752593348eSBarry Smith     c->ilen[i] = a->ilen[i];
12762593348eSBarry Smith   }
12772593348eSBarry Smith 
12782593348eSBarry Smith   /* allocate the matrix space */
12792593348eSBarry Smith   c->singlemalloc = 1;
12807e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
12812593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
12827e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1283de6a44a3SBarry Smith   c->i  = c->j + nz;
1284b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1285b6490206SBarry Smith   if (mbs > 0) {
1286de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
12872593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
12887e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
12892593348eSBarry Smith     }
12902593348eSBarry Smith   }
12912593348eSBarry Smith 
1292b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
12932593348eSBarry Smith   c->sorted      = a->sorted;
12942593348eSBarry Smith   c->roworiented = a->roworiented;
12952593348eSBarry Smith   c->nonew       = a->nonew;
12962593348eSBarry Smith 
12972593348eSBarry Smith   if (a->diag) {
1298b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1299b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1300b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
13012593348eSBarry Smith       c->diag[i] = a->diag[i];
13022593348eSBarry Smith     }
13032593348eSBarry Smith   }
13042593348eSBarry Smith   else c->diag          = 0;
13052593348eSBarry Smith   c->nz                 = a->nz;
13062593348eSBarry Smith   c->maxnz              = a->maxnz;
13072593348eSBarry Smith   c->solve_work         = 0;
13082593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
13097fc0212eSBarry Smith   c->mult_work          = 0;
13102593348eSBarry Smith   *B = C;
13112593348eSBarry Smith   return 0;
13122593348eSBarry Smith }
13132593348eSBarry Smith 
131419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
13152593348eSBarry Smith {
1316b6490206SBarry Smith   Mat_SeqBAIJ  *a;
13172593348eSBarry Smith   Mat          B;
1318de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1319b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
132035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1321de6a44a3SBarry Smith   int          *masked, nmask,tmp,ishift,bs2;
1322b6490206SBarry Smith   Scalar       *aa;
132319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
13242593348eSBarry Smith 
132535aab85fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1326de6a44a3SBarry Smith   bs2  = bs*bs;
1327b6490206SBarry Smith 
13282593348eSBarry Smith   MPI_Comm_size(comm,&size);
1329b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
133090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
133177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1332de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
13332593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
13342593348eSBarry Smith 
1335b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
133635aab85fSBarry Smith 
133735aab85fSBarry Smith   /*
133835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
133935aab85fSBarry Smith     divisible by the blocksize
134035aab85fSBarry Smith   */
1341b6490206SBarry Smith   mbs        = M/bs;
134235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
134335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
134435aab85fSBarry Smith   else                  mbs++;
134535aab85fSBarry Smith   if (extra_rows) {
134635aab85fSBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
134735aab85fSBarry Smith   }
1348b6490206SBarry Smith 
13492593348eSBarry Smith   /* read in row lengths */
135035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
135177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
135235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
13532593348eSBarry Smith 
1354b6490206SBarry Smith   /* read in column indices */
135535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
135677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
135735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1358b6490206SBarry Smith 
1359b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1360b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1361b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
136235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
136335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
136435aab85fSBarry Smith   masked = mask + mbs;
1365b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1366b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
136735aab85fSBarry Smith     nmask = 0;
1368b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1369b6490206SBarry Smith       kmax = rowlengths[rowcount];
1370b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
137135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
137235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1373b6490206SBarry Smith       }
1374b6490206SBarry Smith       rowcount++;
1375b6490206SBarry Smith     }
137635aab85fSBarry Smith     browlengths[i] += nmask;
137735aab85fSBarry Smith     /* zero out the mask elements we set */
137835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1379b6490206SBarry Smith   }
1380b6490206SBarry Smith 
13812593348eSBarry Smith   /* create our matrix */
138235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
138335aab85fSBarry Smith          CHKERRQ(ierr);
13842593348eSBarry Smith   B = *A;
1385b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
13862593348eSBarry Smith 
13872593348eSBarry Smith   /* set matrix "i" values */
1388de6a44a3SBarry Smith   a->i[0] = 0;
1389b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1390b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1391b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
13922593348eSBarry Smith   }
13939867e207SSatish Balay   a->indexshift = 0;
1394b6490206SBarry Smith   a->nz         = 0;
1395b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
13962593348eSBarry Smith 
1397b6490206SBarry Smith   /* read in nonzero values */
139835aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
139977c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
140035aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1401b6490206SBarry Smith 
1402b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1403b6490206SBarry Smith   nzcount = 0; jcount = 0;
1404b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1405b6490206SBarry Smith     nzcountb = nzcount;
140635aab85fSBarry Smith     nmask    = 0;
1407b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1408b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1409b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
141035aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
141135aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1412b6490206SBarry Smith       }
1413b6490206SBarry Smith       rowcount++;
1414b6490206SBarry Smith     }
1415de6a44a3SBarry Smith     /* sort the masked values */
141677c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1417de6a44a3SBarry Smith 
1418b6490206SBarry Smith     /* set "j" values into matrix */
1419b6490206SBarry Smith     maskcount = 1;
142035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
142135aab85fSBarry Smith       a->j[jcount++]  = masked[j];
1422de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
1423b6490206SBarry Smith     }
1424b6490206SBarry Smith     /* set "a" values into matrix */
1425de6a44a3SBarry Smith     ishift = bs2*a->i[i];
1426b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1427b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1428b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
1429de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
1430de6a44a3SBarry Smith         block  = mask[tmp] - 1;
1431de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
1432de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
1433b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
1434b6490206SBarry Smith       }
1435b6490206SBarry Smith     }
143635aab85fSBarry Smith     /* zero out the mask elements we set */
143735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1438b6490206SBarry Smith   }
143935aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1440b6490206SBarry Smith 
1441b6490206SBarry Smith   PetscFree(rowlengths);
1442b6490206SBarry Smith   PetscFree(browlengths);
1443b6490206SBarry Smith   PetscFree(aa);
1444b6490206SBarry Smith   PetscFree(jj);
1445b6490206SBarry Smith   PetscFree(mask);
1446b6490206SBarry Smith 
1447b6490206SBarry Smith   B->assembled = PETSC_TRUE;
1448de6a44a3SBarry Smith 
1449de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1450de6a44a3SBarry Smith   if (flg) {
145119bcc07fSBarry Smith     Viewer tviewer;
145219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
145390ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
145419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
145519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1456de6a44a3SBarry Smith   }
1457de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1458de6a44a3SBarry Smith   if (flg) {
145919bcc07fSBarry Smith     Viewer tviewer;
146019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
146190ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
146219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
146319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1464de6a44a3SBarry Smith   }
1465de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1466de6a44a3SBarry Smith   if (flg) {
146719bcc07fSBarry Smith     Viewer tviewer;
146819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
146919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
147019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1471de6a44a3SBarry Smith   }
1472de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1473de6a44a3SBarry Smith   if (flg) {
147419bcc07fSBarry Smith     Viewer tviewer;
147519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
147690ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
147719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
147819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1479de6a44a3SBarry Smith   }
1480de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1481de6a44a3SBarry Smith   if (flg) {
148219bcc07fSBarry Smith     Viewer tviewer;
148319bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
148419bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
148519bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
148619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1487de6a44a3SBarry Smith   }
14882593348eSBarry Smith   return 0;
14892593348eSBarry Smith }
14902593348eSBarry Smith 
14912593348eSBarry Smith 
14922593348eSBarry Smith 
1493