xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f2501298210485f1203ef904b88cfc58e030e56a)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*f2501298SSatish Balay static char vcid[] = "$Id: baij.c,v 1.27 1996/04/05 16:20:05 balay Exp balay $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
112593348eSBarry Smith #include "vec/vecimpl.h"
122593348eSBarry Smith #include "inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
142593348eSBarry Smith 
15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41bcd2baecSBarry Smith   ishift = a->indexshift;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
44bcd2baecSBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45bcd2baecSBarry Smith     CHKERRQ(ierr);
46bcd2baecSBarry Smith     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
472593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
48bcd2baecSBarry Smith   } else {
49bcd2baecSBarry Smith     if (ishift == oshift) {
50bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51bcd2baecSBarry Smith     }
52bcd2baecSBarry Smith     else if (ishift == -1) {
53bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
54bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
55bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
56bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
59bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60bcd2baecSBarry Smith     } else {
61bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
62bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
63bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
64bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
67bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68bcd2baecSBarry Smith     }
69bcd2baecSBarry Smith   }
702593348eSBarry Smith   return 0;
712593348eSBarry Smith }
722593348eSBarry Smith 
73de6a44a3SBarry Smith /*
74de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
75de6a44a3SBarry Smith */
76de6a44a3SBarry Smith 
77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
78de6a44a3SBarry Smith {
79de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
807fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
81de6a44a3SBarry Smith 
82de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
847fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
85de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86de6a44a3SBarry Smith       if (a->j[j] == i) {
87de6a44a3SBarry Smith         diag[i] = j;
88de6a44a3SBarry Smith         break;
89de6a44a3SBarry Smith       }
90de6a44a3SBarry Smith     }
91de6a44a3SBarry Smith   }
92de6a44a3SBarry Smith   a->diag = diag;
93de6a44a3SBarry Smith   return 0;
94de6a44a3SBarry Smith }
952593348eSBarry Smith 
962593348eSBarry Smith #include "draw.h"
972593348eSBarry Smith #include "pinclude/pviewer.h"
9877c4ece6SBarry Smith #include "sys.h"
992593348eSBarry Smith 
100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1012593348eSBarry Smith {
102b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1037e67e3f9SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104b6490206SBarry Smith   Scalar      *aa;
1052593348eSBarry Smith 
10690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1072593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1082593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1092593348eSBarry Smith   col_lens[1] = a->m;
1102593348eSBarry Smith   col_lens[2] = a->n;
1117e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1122593348eSBarry Smith 
1132593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
114b6490206SBarry Smith   count = 0;
115b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
116b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
117b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118b6490206SBarry Smith     }
1192593348eSBarry Smith   }
12077c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1212593348eSBarry Smith   PetscFree(col_lens);
1222593348eSBarry Smith 
1232593348eSBarry Smith   /* store column indices (zero start index) */
1247e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125b6490206SBarry Smith   count = 0;
126b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
127b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
128b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
130b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1312593348eSBarry Smith         }
1322593348eSBarry Smith       }
133b6490206SBarry Smith     }
134b6490206SBarry Smith   }
1357e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136b6490206SBarry Smith   PetscFree(jj);
1372593348eSBarry Smith 
1382593348eSBarry Smith   /* store nonzero values */
1397e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140b6490206SBarry Smith   count = 0;
141b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
142b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
143b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1457e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
146b6490206SBarry Smith         }
147b6490206SBarry Smith       }
148b6490206SBarry Smith     }
149b6490206SBarry Smith   }
1507e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151b6490206SBarry Smith   PetscFree(aa);
1522593348eSBarry Smith   return 0;
1532593348eSBarry Smith }
1542593348eSBarry Smith 
155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1562593348eSBarry Smith {
157b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1587e67e3f9SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
1592593348eSBarry Smith   FILE        *fd;
1602593348eSBarry Smith   char        *outputname;
1612593348eSBarry Smith 
16290ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1632593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16490ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
16590ace30eSBarry Smith   if (format == ASCII_FORMAT_INFO) {
1662593348eSBarry Smith     /* no need to print additional information */ ;
1672593348eSBarry Smith   }
16890ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
169b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1702593348eSBarry Smith   }
1712593348eSBarry Smith   else {
172b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
173b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
174b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
175b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
17788685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1787e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
17988685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1807e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18188685aaeSLois Curfman McInnes           }
18288685aaeSLois Curfman McInnes           else {
1837e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18488685aaeSLois Curfman McInnes           }
18588685aaeSLois Curfman McInnes #else
1867e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18788685aaeSLois Curfman McInnes #endif
1882593348eSBarry Smith           }
1892593348eSBarry Smith         }
1902593348eSBarry Smith         fprintf(fd,"\n");
1912593348eSBarry Smith       }
1922593348eSBarry Smith     }
193b6490206SBarry Smith   }
1942593348eSBarry Smith   fflush(fd);
1952593348eSBarry Smith   return 0;
1962593348eSBarry Smith }
1972593348eSBarry Smith 
198b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
1992593348eSBarry Smith {
2002593348eSBarry Smith   Mat         A = (Mat) obj;
20119bcc07fSBarry Smith   ViewerType  vtype;
20219bcc07fSBarry Smith   int         ierr;
2032593348eSBarry Smith 
2042593348eSBarry Smith   if (!viewer) {
20519bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
2062593348eSBarry Smith   }
20719bcc07fSBarry Smith 
20819bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
20919bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
210b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
2112593348eSBarry Smith   }
21219bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
2142593348eSBarry Smith   }
21519bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
216b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
2172593348eSBarry Smith   }
21819bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
219b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
2202593348eSBarry Smith   }
2212593348eSBarry Smith   return 0;
2222593348eSBarry Smith }
223b6490206SBarry Smith 
2247e67e3f9SSatish Balay #define CHUNKSIZE  1
225cd0e1443SSatish Balay 
226cd0e1443SSatish Balay /* This version has row oriented v  */
227cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
228cd0e1443SSatish Balay {
229cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
231cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
232cd0e1443SSatish Balay   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
2337e67e3f9SSatish Balay   int          ridx,cidx,bs2=bs*bs;
234cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
235cd0e1443SSatish Balay 
236cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
237cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
238cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
239cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
2407e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
241cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
242cd0e1443SSatish Balay     low = 0;
243cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
244cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
245cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
246cd0e1443SSatish Balay       col = in[l] - shift; bcol = col/bs;
247cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
248cd0e1443SSatish Balay       if (roworiented) {
249cd0e1443SSatish Balay         value = *v++;
250cd0e1443SSatish Balay       }
251cd0e1443SSatish Balay       else {
252cd0e1443SSatish Balay         value = v[k + l*m];
253cd0e1443SSatish Balay       }
254cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
255cd0e1443SSatish Balay       while (high-low > 5) {
256cd0e1443SSatish Balay         t = (low+high)/2;
257cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
258cd0e1443SSatish Balay         else             low  = t;
259cd0e1443SSatish Balay       }
260cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
261cd0e1443SSatish Balay         if (rp[i] > bcol) break;
262cd0e1443SSatish Balay         if (rp[i] == bcol) {
2637e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
264cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
265cd0e1443SSatish Balay           else                  *bap  = value;
266cd0e1443SSatish Balay           goto noinsert;
267cd0e1443SSatish Balay         }
268cd0e1443SSatish Balay       }
269cd0e1443SSatish Balay       if (nonew) goto noinsert;
270cd0e1443SSatish Balay       if (nrow >= rmax) {
271cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
272cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
273cd0e1443SSatish Balay         Scalar *new_a;
274cd0e1443SSatish Balay 
275cd0e1443SSatish Balay         /* malloc new storage space */
2767e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
277cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
2787e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
279cd0e1443SSatish Balay         new_i   = new_j + new_nz;
280cd0e1443SSatish Balay 
281cd0e1443SSatish Balay         /* copy over old data into new slots */
282cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
2837e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
2847e67e3f9SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
285cd0e1443SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
286cd0e1443SSatish Balay         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
287cd0e1443SSatish Balay                                                            len*sizeof(int));
2887e67e3f9SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
2897e67e3f9SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
2907e67e3f9SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
2917e67e3f9SSatish Balay                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
292cd0e1443SSatish Balay         /* free up old matrix storage */
293cd0e1443SSatish Balay         PetscFree(a->a);
294cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
295cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
296cd0e1443SSatish Balay         a->singlemalloc = 1;
297cd0e1443SSatish Balay 
2987e67e3f9SSatish Balay         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
299cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
3007e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
3017e67e3f9SSatish Balay         a->maxnz += bs2*CHUNKSIZE;
302cd0e1443SSatish Balay         a->reallocs++;
3037e67e3f9SSatish Balay         a->nz+=bs2;
304cd0e1443SSatish Balay       }
3057e67e3f9SSatish Balay       N = nrow++ - 1;
306cd0e1443SSatish Balay       /* shift up all the later entries in this row */
307cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
308cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
3097e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
310cd0e1443SSatish Balay       }
3117e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
312cd0e1443SSatish Balay       rp[i] = bcol;
3137e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
314cd0e1443SSatish Balay       noinsert:;
315cd0e1443SSatish Balay       low = i;
316cd0e1443SSatish Balay     }
317cd0e1443SSatish Balay     ailen[brow] = nrow;
318cd0e1443SSatish Balay   }
319cd0e1443SSatish Balay   return 0;
320cd0e1443SSatish Balay }
321cd0e1443SSatish Balay 
3220b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
3230b824a48SSatish Balay {
3240b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3250b824a48SSatish Balay   *m = a->m; *n = a->n;
3260b824a48SSatish Balay   return 0;
3270b824a48SSatish Balay }
3280b824a48SSatish Balay 
3290b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
3300b824a48SSatish Balay {
3310b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3320b824a48SSatish Balay   *m = 0; *n = a->m;
3330b824a48SSatish Balay   return 0;
3340b824a48SSatish Balay }
3350b824a48SSatish Balay 
3369867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3379867e207SSatish Balay {
3389867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3397e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
3409867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
3419867e207SSatish Balay 
3429867e207SSatish Balay   bs  = a->bs;
3439867e207SSatish Balay   ai  = a->i;
3449867e207SSatish Balay   aj  = a->j;
3459867e207SSatish Balay   aa  = a->a;
3467e67e3f9SSatish Balay   bs2 = bs*bs;
3479867e207SSatish Balay 
3489867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
3499867e207SSatish Balay 
3509867e207SSatish Balay   bn  = row/bs;   /* Block number */
3519867e207SSatish Balay   bp  = row % bs; /* Block Position */
3529867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
3539867e207SSatish Balay   *nz = bs*M;
3549867e207SSatish Balay 
3559867e207SSatish Balay   if (v) {
3569867e207SSatish Balay     *v = 0;
3579867e207SSatish Balay     if (*nz) {
3589867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
3599867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3609867e207SSatish Balay         v_i  = *v + i*bs;
3617e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
3627e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
3639867e207SSatish Balay       }
3649867e207SSatish Balay     }
3659867e207SSatish Balay   }
3669867e207SSatish Balay 
3679867e207SSatish Balay   if (idx) {
3689867e207SSatish Balay     *idx = 0;
3699867e207SSatish Balay     if (*nz) {
3709867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
3719867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3729867e207SSatish Balay         idx_i = *idx + i*bs;
3739867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
3749867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
3759867e207SSatish Balay       }
3769867e207SSatish Balay     }
3779867e207SSatish Balay   }
3789867e207SSatish Balay   return 0;
3799867e207SSatish Balay }
3809867e207SSatish Balay 
3819867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3829867e207SSatish Balay {
3839867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
3849867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
3859867e207SSatish Balay   return 0;
3869867e207SSatish Balay }
387b6490206SBarry Smith 
388*f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
389*f2501298SSatish Balay {
390*f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
391*f2501298SSatish Balay   Mat         C;
392*f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
393*f2501298SSatish Balay   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
394*f2501298SSatish Balay   Scalar      *array=a->a;
395*f2501298SSatish Balay 
396*f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
397*f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
398*f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
399*f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
400*f2501298SSatish Balay   if (shift) {
401*f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
402*f2501298SSatish Balay   }
403*f2501298SSatish Balay   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
404*f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
405*f2501298SSatish Balay   PetscFree(col);
406*f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
407*f2501298SSatish Balay   cols = rows + bs;
408*f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
409*f2501298SSatish Balay     cols[0] = i*bs;
410*f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
411*f2501298SSatish Balay     len = ai[i+1] - ai[i];
412*f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
413*f2501298SSatish Balay       rows[0] = (*aj++)*bs;
414*f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
415*f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
416*f2501298SSatish Balay       array += bs2;
417*f2501298SSatish Balay     }
418*f2501298SSatish Balay   }
419*f2501298SSatish Balay   if (shift) {
420*f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
421*f2501298SSatish Balay   }
422*f2501298SSatish Balay 
423*f2501298SSatish Balay   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
424*f2501298SSatish Balay   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
425*f2501298SSatish Balay 
426*f2501298SSatish Balay   if (B != PETSC_NULL) {
427*f2501298SSatish Balay     *B = C;
428*f2501298SSatish Balay   } else {
429*f2501298SSatish Balay     /* This isn't really an in-place transpose */
430*f2501298SSatish Balay     PetscFree(a->a);
431*f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
432*f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
433*f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
434*f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
435*f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
436*f2501298SSatish Balay     PetscFree(a);
437*f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
438*f2501298SSatish Balay     PetscHeaderDestroy(C);
439*f2501298SSatish Balay   }
440*f2501298SSatish Balay   return 0;
441*f2501298SSatish Balay }
442*f2501298SSatish Balay 
443*f2501298SSatish Balay 
444584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
445584200bdSSatish Balay {
446584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
447584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
448584200bdSSatish Balay   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
449584200bdSSatish Balay   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
450584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
451584200bdSSatish Balay 
452584200bdSSatish Balay   if (mode == FLUSH_ASSEMBLY) return 0;
453584200bdSSatish Balay 
454584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
455584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
456584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
457584200bdSSatish Balay     if (fshift) {
458584200bdSSatish Balay       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
459584200bdSSatish Balay       N = ailen[i];
460584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
461584200bdSSatish Balay         ip[j-fshift] = ip[j];
4627e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
463584200bdSSatish Balay       }
464584200bdSSatish Balay     }
465584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
466584200bdSSatish Balay   }
467584200bdSSatish Balay   if (mbs) {
468584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
469584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
470584200bdSSatish Balay   }
471584200bdSSatish Balay   /* reset ilen and imax for each row */
472584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
473584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
474584200bdSSatish Balay   }
475584200bdSSatish Balay   a->nz = (ai[m] + shift)*bs2;
476584200bdSSatish Balay 
477584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
478584200bdSSatish Balay   if (fshift && a->diag) {
479584200bdSSatish Balay     PetscFree(a->diag);
480584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
481584200bdSSatish Balay     a->diag = 0;
482584200bdSSatish Balay   }
483584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
484584200bdSSatish Balay            fshift,a->nz,m);
485584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
486584200bdSSatish Balay            a->reallocs);
487584200bdSSatish Balay   return 0;
488584200bdSSatish Balay }
489584200bdSSatish Balay 
490b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
4912593348eSBarry Smith {
492b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
493de6a44a3SBarry Smith   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
4942593348eSBarry Smith   return 0;
4952593348eSBarry Smith }
4962593348eSBarry Smith 
497b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
4982593348eSBarry Smith {
4992593348eSBarry Smith   Mat         A  = (Mat) obj;
500b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5012593348eSBarry Smith 
5022593348eSBarry Smith #if defined(PETSC_LOG)
5032593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
5042593348eSBarry Smith #endif
5052593348eSBarry Smith   PetscFree(a->a);
5062593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
5072593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
5082593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
5092593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
5102593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
511de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
5122593348eSBarry Smith   PetscFree(a);
513f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
514f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
5152593348eSBarry Smith   return 0;
5162593348eSBarry Smith }
5172593348eSBarry Smith 
518b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
5192593348eSBarry Smith {
520b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5212593348eSBarry Smith   if      (op == ROW_ORIENTED)              a->roworiented = 1;
5222593348eSBarry Smith   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
5232593348eSBarry Smith   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
5242593348eSBarry Smith   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
5252593348eSBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
5262593348eSBarry Smith   else if (op == ROWS_SORTED ||
5272593348eSBarry Smith            op == SYMMETRIC_MATRIX ||
5282593348eSBarry Smith            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5292593348eSBarry Smith            op == YES_NEW_DIAGONALS)
53094a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
5312593348eSBarry Smith   else if (op == NO_NEW_DIAGONALS)
532b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
5332593348eSBarry Smith   else
534b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
5352593348eSBarry Smith   return 0;
5362593348eSBarry Smith }
5372593348eSBarry Smith 
5382593348eSBarry Smith 
5392593348eSBarry Smith /* -------------------------------------------------------*/
5402593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
5412593348eSBarry Smith /* -------------------------------------------------------*/
542b6490206SBarry Smith #include "pinclude/plapack.h"
543b6490206SBarry Smith 
544b6490206SBarry Smith static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
5452593348eSBarry Smith {
546b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
547b6490206SBarry Smith   Scalar          *xg,*yg;
548b6490206SBarry Smith   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
549b6490206SBarry Smith   register Scalar x1,x2,x3,x4,x5;
550b6490206SBarry Smith   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
551b6490206SBarry Smith   int             bs = a->bs,j,n,bs2 = bs*bs;
5522593348eSBarry Smith 
553b6490206SBarry Smith   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
554b6490206SBarry Smith   PetscMemzero(y,m*sizeof(Scalar));
555b6490206SBarry Smith   x     = x;
5562593348eSBarry Smith   idx   = a->j;
5572593348eSBarry Smith   v     = a->a;
5582593348eSBarry Smith   ii    = a->i;
559b6490206SBarry Smith 
560b6490206SBarry Smith   switch (bs) {
561b6490206SBarry Smith     case 1:
5622593348eSBarry Smith      for ( i=0; i<m; i++ ) {
5632593348eSBarry Smith        n    = ii[1] - ii[0]; ii++;
5642593348eSBarry Smith        sum  = 0.0;
5652593348eSBarry Smith        while (n--) sum += *v++ * x[*idx++];
5662593348eSBarry Smith        y[i] = sum;
5672593348eSBarry Smith       }
5682593348eSBarry Smith       break;
569b6490206SBarry Smith     case 2:
570b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
571de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
572b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0;
573b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
574b6490206SBarry Smith           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
575b6490206SBarry Smith           sum1 += v[0]*x1 + v[2]*x2;
576b6490206SBarry Smith           sum2 += v[1]*x1 + v[3]*x2;
577b6490206SBarry Smith           v += 4;
578b6490206SBarry Smith         }
579b6490206SBarry Smith         y[0] += sum1; y[1] += sum2;
580b6490206SBarry Smith         y += 2;
581b6490206SBarry Smith       }
582b6490206SBarry Smith       break;
583b6490206SBarry Smith     case 3:
584b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
585de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
586b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
587b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
588b6490206SBarry Smith           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
589b6490206SBarry Smith           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
590b6490206SBarry Smith           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
591b6490206SBarry Smith           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
592b6490206SBarry Smith           v += 9;
593b6490206SBarry Smith         }
594b6490206SBarry Smith         y[0] += sum1; y[1] += sum2; y[2] += sum3;
595b6490206SBarry Smith         y += 3;
596b6490206SBarry Smith       }
597b6490206SBarry Smith       break;
598b6490206SBarry Smith     case 4:
599b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
600de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
601b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
602b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
603b6490206SBarry Smith           xb = x + 4*(*idx++);
604b6490206SBarry Smith           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
605b6490206SBarry Smith           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
606b6490206SBarry Smith           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
607b6490206SBarry Smith           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
608b6490206SBarry Smith           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
609b6490206SBarry Smith           v += 16;
610b6490206SBarry Smith         }
611b6490206SBarry Smith         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
612b6490206SBarry Smith         y += 4;
613b6490206SBarry Smith       }
614b6490206SBarry Smith       break;
615b6490206SBarry Smith     case 5:
616b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
617de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
618b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
619b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
620b6490206SBarry Smith           xb = x + 5*(*idx++);
621b6490206SBarry Smith           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
622b6490206SBarry Smith           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
623b6490206SBarry Smith           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
624b6490206SBarry Smith           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
625b6490206SBarry Smith           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
626b6490206SBarry Smith           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
627b6490206SBarry Smith           v += 25;
628b6490206SBarry Smith         }
629b6490206SBarry Smith         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
630b6490206SBarry Smith         y += 5;
631b6490206SBarry Smith       }
632b6490206SBarry Smith       break;
633b6490206SBarry Smith       /* block sizes larger then 5 by 5 are handled by BLAS */
634b6490206SBarry Smith     default: {
635de6a44a3SBarry Smith       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
636de6a44a3SBarry Smith       if (!a->mult_work) {
637de6a44a3SBarry Smith         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
638de6a44a3SBarry Smith         CHKPTRQ(a->mult_work);
639de6a44a3SBarry Smith       }
640de6a44a3SBarry Smith       work = a->mult_work;
641b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
642de6a44a3SBarry Smith         n     = ii[1] - ii[0]; ii++;
643de6a44a3SBarry Smith         ncols = n*bs;
644de6a44a3SBarry Smith         workt = work;
645b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
646b6490206SBarry Smith           xb = x + bs*(*idx++);
647de6a44a3SBarry Smith           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
648de6a44a3SBarry Smith           workt += bs;
649b6490206SBarry Smith         }
650de6a44a3SBarry Smith         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
651de6a44a3SBarry Smith         v += n*bs2;
652b6490206SBarry Smith         y += bs;
6532593348eSBarry Smith       }
6542593348eSBarry Smith     }
6552593348eSBarry Smith   }
656b6490206SBarry Smith   PLogFlops(2*bs2*a->nz - m);
6572593348eSBarry Smith   return 0;
6582593348eSBarry Smith }
6592593348eSBarry Smith 
660de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
6612593348eSBarry Smith {
662b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
663bcd2baecSBarry Smith   if (nz)  *nz  = a->bs*a->bs*a->nz;
664bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
665bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
6662593348eSBarry Smith   return 0;
6672593348eSBarry Smith }
6682593348eSBarry Smith 
66991d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
67091d316f6SSatish Balay {
67191d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
67291d316f6SSatish Balay 
67391d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
67491d316f6SSatish Balay 
67591d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
67691d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
67791d316f6SSatish Balay       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
67891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
67991d316f6SSatish Balay   }
68091d316f6SSatish Balay 
68191d316f6SSatish Balay   /* if the a->i are the same */
68291d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
68391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
68491d316f6SSatish Balay   }
68591d316f6SSatish Balay 
68691d316f6SSatish Balay   /* if a->j are the same */
68791d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
68891d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
68991d316f6SSatish Balay   }
69091d316f6SSatish Balay 
69191d316f6SSatish Balay   /* if a->a are the same */
69291d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
69391d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
69491d316f6SSatish Balay   }
69591d316f6SSatish Balay   *flg = PETSC_TRUE;
69691d316f6SSatish Balay   return 0;
69791d316f6SSatish Balay 
69891d316f6SSatish Balay }
69991d316f6SSatish Balay 
70091d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
70191d316f6SSatish Balay {
70291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7037e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
70417e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
70517e48fc4SSatish Balay 
70617e48fc4SSatish Balay   bs   = a->bs;
70717e48fc4SSatish Balay   aa   = a->a;
70817e48fc4SSatish Balay   ai   = a->i;
70917e48fc4SSatish Balay   aj   = a->j;
71017e48fc4SSatish Balay   ambs = a->mbs;
7117e67e3f9SSatish Balay   bs2  = bs*bs;
71291d316f6SSatish Balay 
71391d316f6SSatish Balay   VecSet(&zero,v);
71491d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
7159867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
71617e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
71717e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
71817e48fc4SSatish Balay       if (aj[j] == i) {
71917e48fc4SSatish Balay         row  = i*bs;
7207e67e3f9SSatish Balay         aa_j = aa+j*bs2;
7217e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
72291d316f6SSatish Balay         break;
72391d316f6SSatish Balay       }
72491d316f6SSatish Balay     }
72591d316f6SSatish Balay   }
72691d316f6SSatish Balay   return 0;
72791d316f6SSatish Balay }
72891d316f6SSatish Balay 
7299867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
7309867e207SSatish Balay {
7319867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7329867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
7337e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
7349867e207SSatish Balay 
7359867e207SSatish Balay   ai  = a->i;
7369867e207SSatish Balay   aj  = a->j;
7379867e207SSatish Balay   aa  = a->a;
7389867e207SSatish Balay   m   = a->m;
7399867e207SSatish Balay   n   = a->n;
7409867e207SSatish Balay   bs  = a->bs;
7419867e207SSatish Balay   mbs = a->mbs;
7427e67e3f9SSatish Balay   bs2 = bs*bs;
7439867e207SSatish Balay   if (ll) {
7449867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
7459867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
7469867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
7479867e207SSatish Balay       M  = ai[i+1] - ai[i];
7489867e207SSatish Balay       li = l + i*bs;
7497e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
7509867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
7517e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
7529867e207SSatish Balay           (*v++) *= li[k%bs];
7539867e207SSatish Balay         }
7549867e207SSatish Balay       }
7559867e207SSatish Balay     }
7569867e207SSatish Balay   }
7579867e207SSatish Balay 
7589867e207SSatish Balay   if (rr) {
7599867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
7609867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
7619867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
7629867e207SSatish Balay       M  = ai[i+1] - ai[i];
7637e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
7649867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
7659867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
7669867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
7679867e207SSatish Balay           x = ri[k];
7689867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
7699867e207SSatish Balay         }
7709867e207SSatish Balay       }
7719867e207SSatish Balay     }
7729867e207SSatish Balay   }
7739867e207SSatish Balay   return 0;
7749867e207SSatish Balay }
7759867e207SSatish Balay 
7769867e207SSatish Balay 
777b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
778b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
779b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
7807fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
7817fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
7827fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
7837fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
7847fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
7857fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
7862593348eSBarry Smith 
787b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
7882593348eSBarry Smith {
789b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7902593348eSBarry Smith   Scalar      *v = a->a;
7912593348eSBarry Smith   double      sum = 0.0;
792b6490206SBarry Smith   int         i;
7932593348eSBarry Smith 
7942593348eSBarry Smith   if (type == NORM_FROBENIUS) {
7952593348eSBarry Smith     for (i=0; i<a->nz; i++ ) {
7962593348eSBarry Smith #if defined(PETSC_COMPLEX)
7972593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
7982593348eSBarry Smith #else
7992593348eSBarry Smith       sum += (*v)*(*v); v++;
8002593348eSBarry Smith #endif
8012593348eSBarry Smith     }
8022593348eSBarry Smith     *norm = sqrt(sum);
8032593348eSBarry Smith   }
8042593348eSBarry Smith   else {
805b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
8062593348eSBarry Smith   }
8072593348eSBarry Smith   return 0;
8082593348eSBarry Smith }
8092593348eSBarry Smith 
8102593348eSBarry Smith /*
8112593348eSBarry Smith      note: This can only work for identity for row and col. It would
8122593348eSBarry Smith    be good to check this and otherwise generate an error.
8132593348eSBarry Smith */
814b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
8152593348eSBarry Smith {
816b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
8172593348eSBarry Smith   Mat         outA;
818de6a44a3SBarry Smith   int         ierr;
8192593348eSBarry Smith 
820b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
8212593348eSBarry Smith 
8222593348eSBarry Smith   outA          = inA;
8232593348eSBarry Smith   inA->factor   = FACTOR_LU;
8242593348eSBarry Smith   a->row        = row;
8252593348eSBarry Smith   a->col        = col;
8262593348eSBarry Smith 
827de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
8282593348eSBarry Smith 
8292593348eSBarry Smith   if (!a->diag) {
830b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
8312593348eSBarry Smith   }
8327fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
8332593348eSBarry Smith   return 0;
8342593348eSBarry Smith }
8352593348eSBarry Smith 
836b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
8372593348eSBarry Smith {
838b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
839b6490206SBarry Smith   int         one = 1, totalnz = a->bs*a->bs*a->nz;
840b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
841b6490206SBarry Smith   PLogFlops(totalnz);
8422593348eSBarry Smith   return 0;
8432593348eSBarry Smith }
8442593348eSBarry Smith 
8457e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
8467e67e3f9SSatish Balay {
8477e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8487e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
8497e67e3f9SSatish Balay   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
8507e67e3f9SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
8517e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
8527e67e3f9SSatish Balay 
8537e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
8547e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
8557e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
8567e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
8577e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
8587e67e3f9SSatish Balay     nrow = ailen[brow];
8597e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
8607e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
8617e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
8627e67e3f9SSatish Balay       col  = in[l] - shift;
8637e67e3f9SSatish Balay       bcol = col/bs;
8647e67e3f9SSatish Balay       cidx = col%bs;
8657e67e3f9SSatish Balay       ridx = row%bs;
8667e67e3f9SSatish Balay       high = nrow;
8677e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
8687e67e3f9SSatish Balay       while (high-low > 5) {
8697e67e3f9SSatish Balay         t = (low+high)/2;
8707e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
8717e67e3f9SSatish Balay         else             low  = t;
8727e67e3f9SSatish Balay       }
8737e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
8747e67e3f9SSatish Balay         if (rp[i] > bcol) break;
8757e67e3f9SSatish Balay         if (rp[i] == bcol) {
8767e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
8777e67e3f9SSatish Balay           goto finished;
8787e67e3f9SSatish Balay         }
8797e67e3f9SSatish Balay       }
8807e67e3f9SSatish Balay       *v++ = zero;
8817e67e3f9SSatish Balay       finished:;
8827e67e3f9SSatish Balay     }
8837e67e3f9SSatish Balay   }
8847e67e3f9SSatish Balay   return 0;
8857e67e3f9SSatish Balay }
8867e67e3f9SSatish Balay 
887b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A)
8882593348eSBarry Smith {
8892593348eSBarry Smith   static int called = 0;
8902593348eSBarry Smith 
8912593348eSBarry Smith   if (called) return 0; else called = 1;
8922593348eSBarry Smith   return 0;
8932593348eSBarry Smith }
8942593348eSBarry Smith /* -------------------------------------------------------------------*/
895cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
8969867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
897b6490206SBarry Smith        MatMult_SeqBAIJ,0,
898b6490206SBarry Smith        0,0,
899de6a44a3SBarry Smith        MatSolve_SeqBAIJ,0,
900b6490206SBarry Smith        0,0,
901de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
902b6490206SBarry Smith        0,
903*f2501298SSatish Balay        MatTranspose_SeqBAIJ,
90417e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
9059867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
906584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
907b6490206SBarry Smith        0,
908b6490206SBarry Smith        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
909b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
9107fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
911b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
912de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
913b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
914b6490206SBarry Smith        0,0,
915b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
916b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
917b6490206SBarry Smith        0,0,
9187e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
919b6490206SBarry Smith        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
920b6490206SBarry Smith        0};
9212593348eSBarry Smith 
9222593348eSBarry Smith /*@C
923b6490206SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
9242593348eSBarry Smith    (the default parallel PETSc format).  For good matrix assembly performance
9252593348eSBarry Smith    the user should preallocate the matrix storage by setting the parameter nz
9262593348eSBarry Smith    (or nzz).  By setting these parameters accurately, performance can be
9272593348eSBarry Smith    increased by more than a factor of 50.
9282593348eSBarry Smith 
9292593348eSBarry Smith    Input Parameters:
9302593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
931b6490206SBarry Smith .  bs - size of block
9322593348eSBarry Smith .  m - number of rows
9332593348eSBarry Smith .  n - number of columns
934b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
935b6490206SBarry Smith .  nzz - number of block nonzeros per block row or PETSC_NULL
936b6490206SBarry Smith          (possibly different for each row)
9372593348eSBarry Smith 
9382593348eSBarry Smith    Output Parameter:
9392593348eSBarry Smith .  A - the matrix
9402593348eSBarry Smith 
9412593348eSBarry Smith    Notes:
942b6490206SBarry Smith    The BAIJ format, is fully compatible with standard Fortran 77
9432593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
9442593348eSBarry Smith    either one (as in Fortran) or zero.  See the users manual for details.
9452593348eSBarry Smith 
9462593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
9472593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
9482593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
9492593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
9502593348eSBarry Smith 
9512593348eSBarry Smith .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
9522593348eSBarry Smith @*/
953b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
9542593348eSBarry Smith {
9552593348eSBarry Smith   Mat         B;
956b6490206SBarry Smith   Mat_SeqBAIJ *b;
957*f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
958b6490206SBarry Smith 
959*f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
960*f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
9612593348eSBarry Smith 
9622593348eSBarry Smith   *A = 0;
963b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
9642593348eSBarry Smith   PLogObjectCreate(B);
965b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
9662593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
9677fc0212eSBarry Smith   switch (bs) {
9687fc0212eSBarry Smith     case 1:
9697fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
9707fc0212eSBarry Smith       break;
9714eeb42bcSBarry Smith     case 2:
9724eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
9736d84be18SBarry Smith       break;
974f327dd97SBarry Smith     case 3:
975f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
9764eeb42bcSBarry Smith       break;
9776d84be18SBarry Smith     case 4:
9786d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
9796d84be18SBarry Smith       break;
9806d84be18SBarry Smith     case 5:
9816d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
9826d84be18SBarry Smith       break;
9837fc0212eSBarry Smith   }
984b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
985b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
9862593348eSBarry Smith   B->factor           = 0;
9872593348eSBarry Smith   B->lupivotthreshold = 1.0;
9882593348eSBarry Smith   b->row              = 0;
9892593348eSBarry Smith   b->col              = 0;
9902593348eSBarry Smith   b->reallocs         = 0;
9912593348eSBarry Smith 
9922593348eSBarry Smith   b->m       = m;
993b6490206SBarry Smith   b->mbs     = mbs;
9942593348eSBarry Smith   b->n       = n;
995*f2501298SSatish Balay   b->nbs     = nbs;
996b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
9972593348eSBarry Smith   if (nnz == PETSC_NULL) {
9987e67e3f9SSatish Balay     if (nz == PETSC_DEFAULT) nz = CHUNKSIZE;
9992593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1000b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1001b6490206SBarry Smith     nz = nz*mbs;
10022593348eSBarry Smith   }
10032593348eSBarry Smith   else {
10042593348eSBarry Smith     nz = 0;
1005b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
10062593348eSBarry Smith   }
10072593348eSBarry Smith 
10082593348eSBarry Smith   /* allocate the matrix space */
10097e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
10102593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
10117e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
10127e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
10132593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
10142593348eSBarry Smith   b->i  = b->j + nz;
10152593348eSBarry Smith   b->singlemalloc = 1;
10162593348eSBarry Smith 
1017de6a44a3SBarry Smith   b->i[0] = 0;
1018b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
10192593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
10202593348eSBarry Smith   }
10212593348eSBarry Smith 
1022b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1023b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1024b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1025b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
10262593348eSBarry Smith 
1027b6490206SBarry Smith   b->bs               = bs;
1028b6490206SBarry Smith   b->mbs              = mbs;
10292593348eSBarry Smith   b->nz               = 0;
10302593348eSBarry Smith   b->maxnz            = nz;
10312593348eSBarry Smith   b->sorted           = 0;
10322593348eSBarry Smith   b->roworiented      = 1;
10332593348eSBarry Smith   b->nonew            = 0;
10342593348eSBarry Smith   b->diag             = 0;
10352593348eSBarry Smith   b->solve_work       = 0;
1036de6a44a3SBarry Smith   b->mult_work        = 0;
10372593348eSBarry Smith   b->spptr            = 0;
10382593348eSBarry Smith 
10392593348eSBarry Smith   *A = B;
10402593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
10412593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
10422593348eSBarry Smith   return 0;
10432593348eSBarry Smith }
10442593348eSBarry Smith 
1045b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
10462593348eSBarry Smith {
10472593348eSBarry Smith   Mat         C;
1048b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
10497e67e3f9SSatish Balay   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1050de6a44a3SBarry Smith 
1051de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
10522593348eSBarry Smith 
10532593348eSBarry Smith   *B = 0;
1054b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
10552593348eSBarry Smith   PLogObjectCreate(C);
1056b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
10572593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1058b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1059b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
10602593348eSBarry Smith   C->factor     = A->factor;
10612593348eSBarry Smith   c->row        = 0;
10622593348eSBarry Smith   c->col        = 0;
10632593348eSBarry Smith   C->assembled  = PETSC_TRUE;
10642593348eSBarry Smith 
10652593348eSBarry Smith   c->m          = a->m;
10662593348eSBarry Smith   c->n          = a->n;
1067b6490206SBarry Smith   c->bs         = a->bs;
1068b6490206SBarry Smith   c->mbs        = a->mbs;
1069b6490206SBarry Smith   c->nbs        = a->nbs;
10702593348eSBarry Smith 
1071b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1072b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1073b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
10742593348eSBarry Smith     c->imax[i] = a->imax[i];
10752593348eSBarry Smith     c->ilen[i] = a->ilen[i];
10762593348eSBarry Smith   }
10772593348eSBarry Smith 
10782593348eSBarry Smith   /* allocate the matrix space */
10792593348eSBarry Smith   c->singlemalloc = 1;
10807e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
10812593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
10827e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1083de6a44a3SBarry Smith   c->i  = c->j + nz;
1084b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1085b6490206SBarry Smith   if (mbs > 0) {
1086de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
10872593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
10887e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
10892593348eSBarry Smith     }
10902593348eSBarry Smith   }
10912593348eSBarry Smith 
1092b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
10932593348eSBarry Smith   c->sorted      = a->sorted;
10942593348eSBarry Smith   c->roworiented = a->roworiented;
10952593348eSBarry Smith   c->nonew       = a->nonew;
10962593348eSBarry Smith 
10972593348eSBarry Smith   if (a->diag) {
1098b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1099b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1100b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
11012593348eSBarry Smith       c->diag[i] = a->diag[i];
11022593348eSBarry Smith     }
11032593348eSBarry Smith   }
11042593348eSBarry Smith   else c->diag          = 0;
11052593348eSBarry Smith   c->nz                 = a->nz;
11062593348eSBarry Smith   c->maxnz              = a->maxnz;
11072593348eSBarry Smith   c->solve_work         = 0;
11082593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
11097fc0212eSBarry Smith   c->mult_work          = 0;
11102593348eSBarry Smith   *B = C;
11112593348eSBarry Smith   return 0;
11122593348eSBarry Smith }
11132593348eSBarry Smith 
111419bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
11152593348eSBarry Smith {
1116b6490206SBarry Smith   Mat_SeqBAIJ  *a;
11172593348eSBarry Smith   Mat          B;
1118de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1119b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
112035aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1121de6a44a3SBarry Smith   int          *masked, nmask,tmp,ishift,bs2;
1122b6490206SBarry Smith   Scalar       *aa;
112319bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
11242593348eSBarry Smith 
112535aab85fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1126de6a44a3SBarry Smith   bs2  = bs*bs;
1127b6490206SBarry Smith 
11282593348eSBarry Smith   MPI_Comm_size(comm,&size);
1129b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
113090ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
113177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1132de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
11332593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
11342593348eSBarry Smith 
1135b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
113635aab85fSBarry Smith 
113735aab85fSBarry Smith   /*
113835aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
113935aab85fSBarry Smith     divisible by the blocksize
114035aab85fSBarry Smith   */
1141b6490206SBarry Smith   mbs        = M/bs;
114235aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
114335aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
114435aab85fSBarry Smith   else                  mbs++;
114535aab85fSBarry Smith   if (extra_rows) {
114635aab85fSBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
114735aab85fSBarry Smith   }
1148b6490206SBarry Smith 
11492593348eSBarry Smith   /* read in row lengths */
115035aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
115177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
115235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
11532593348eSBarry Smith 
1154b6490206SBarry Smith   /* read in column indices */
115535aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
115677c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
115735aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1158b6490206SBarry Smith 
1159b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1160b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1161b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
116235aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
116335aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
116435aab85fSBarry Smith   masked = mask + mbs;
1165b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1166b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
116735aab85fSBarry Smith     nmask = 0;
1168b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1169b6490206SBarry Smith       kmax = rowlengths[rowcount];
1170b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
117135aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
117235aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1173b6490206SBarry Smith       }
1174b6490206SBarry Smith       rowcount++;
1175b6490206SBarry Smith     }
117635aab85fSBarry Smith     browlengths[i] += nmask;
117735aab85fSBarry Smith     /* zero out the mask elements we set */
117835aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1179b6490206SBarry Smith   }
1180b6490206SBarry Smith 
11812593348eSBarry Smith   /* create our matrix */
118235aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
118335aab85fSBarry Smith          CHKERRQ(ierr);
11842593348eSBarry Smith   B = *A;
1185b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
11862593348eSBarry Smith 
11872593348eSBarry Smith   /* set matrix "i" values */
1188de6a44a3SBarry Smith   a->i[0] = 0;
1189b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1190b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1191b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
11922593348eSBarry Smith   }
11939867e207SSatish Balay   a->indexshift = 0;
1194b6490206SBarry Smith   a->nz         = 0;
1195b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
11962593348eSBarry Smith 
1197b6490206SBarry Smith   /* read in nonzero values */
119835aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
119977c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
120035aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1201b6490206SBarry Smith 
1202b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1203b6490206SBarry Smith   nzcount = 0; jcount = 0;
1204b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1205b6490206SBarry Smith     nzcountb = nzcount;
120635aab85fSBarry Smith     nmask    = 0;
1207b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1208b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1209b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
121035aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
121135aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1212b6490206SBarry Smith       }
1213b6490206SBarry Smith       rowcount++;
1214b6490206SBarry Smith     }
1215de6a44a3SBarry Smith     /* sort the masked values */
121677c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1217de6a44a3SBarry Smith 
1218b6490206SBarry Smith     /* set "j" values into matrix */
1219b6490206SBarry Smith     maskcount = 1;
122035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
122135aab85fSBarry Smith       a->j[jcount++]  = masked[j];
1222de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
1223b6490206SBarry Smith     }
1224b6490206SBarry Smith     /* set "a" values into matrix */
1225de6a44a3SBarry Smith     ishift = bs2*a->i[i];
1226b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1227b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1228b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
1229de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
1230de6a44a3SBarry Smith         block  = mask[tmp] - 1;
1231de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
1232de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
1233b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
1234b6490206SBarry Smith       }
1235b6490206SBarry Smith     }
123635aab85fSBarry Smith     /* zero out the mask elements we set */
123735aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1238b6490206SBarry Smith   }
123935aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1240b6490206SBarry Smith 
1241b6490206SBarry Smith   PetscFree(rowlengths);
1242b6490206SBarry Smith   PetscFree(browlengths);
1243b6490206SBarry Smith   PetscFree(aa);
1244b6490206SBarry Smith   PetscFree(jj);
1245b6490206SBarry Smith   PetscFree(mask);
1246b6490206SBarry Smith 
1247b6490206SBarry Smith   B->assembled = PETSC_TRUE;
1248de6a44a3SBarry Smith 
1249de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1250de6a44a3SBarry Smith   if (flg) {
125119bcc07fSBarry Smith     Viewer tviewer;
125219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
125390ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
125419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
125519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1256de6a44a3SBarry Smith   }
1257de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1258de6a44a3SBarry Smith   if (flg) {
125919bcc07fSBarry Smith     Viewer tviewer;
126019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
126190ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
126219bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
126319bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1264de6a44a3SBarry Smith   }
1265de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1266de6a44a3SBarry Smith   if (flg) {
126719bcc07fSBarry Smith     Viewer tviewer;
126819bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
126919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
127019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1271de6a44a3SBarry Smith   }
1272de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1273de6a44a3SBarry Smith   if (flg) {
127419bcc07fSBarry Smith     Viewer tviewer;
127519bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
127690ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
127719bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
127819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1279de6a44a3SBarry Smith   }
1280de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1281de6a44a3SBarry Smith   if (flg) {
128219bcc07fSBarry Smith     Viewer tviewer;
128319bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
128419bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
128519bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
128619bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1287de6a44a3SBarry Smith   }
12882593348eSBarry Smith   return 0;
12892593348eSBarry Smith }
12902593348eSBarry Smith 
12912593348eSBarry Smith 
12922593348eSBarry Smith 
1293