xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1073c44794429c6829002c7b9a8d9cb80d0a3d1f)
1b6490206SBarry Smith 
22593348eSBarry Smith #ifndef lint
3*1073c447SSatish Balay static char vcid[] = "$Id: baij.c,v 1.28 1996/04/05 21:52:51 balay Exp balay $";
42593348eSBarry Smith #endif
52593348eSBarry Smith 
62593348eSBarry Smith /*
7b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
82593348eSBarry Smith   matrix storage format.
92593348eSBarry Smith */
10b6490206SBarry Smith #include "baij.h"
112593348eSBarry Smith #include "vec/vecimpl.h"
122593348eSBarry Smith #include "inline/spops.h"
132593348eSBarry Smith #include "petsc.h"
142593348eSBarry Smith 
15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
162593348eSBarry Smith 
17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm)
182593348eSBarry Smith {
19b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
20bcd2baecSBarry Smith   int         ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift;
212593348eSBarry Smith 
222593348eSBarry Smith   /*
232593348eSBarry Smith      this is tacky: In the future when we have written special factorization
242593348eSBarry Smith      and solve routines for the identity permutation we should use a
252593348eSBarry Smith      stride index set instead of the general one.
262593348eSBarry Smith   */
272593348eSBarry Smith   if (type  == ORDER_NATURAL) {
282593348eSBarry Smith     idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx);
292593348eSBarry Smith     for ( i=0; i<n; i++ ) idx[i] = i;
302593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr);
312593348eSBarry Smith     ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr);
322593348eSBarry Smith     PetscFree(idx);
332593348eSBarry Smith     ISSetPermutation(*rperm);
342593348eSBarry Smith     ISSetPermutation(*cperm);
352593348eSBarry Smith     ISSetIdentity(*rperm);
362593348eSBarry Smith     ISSetIdentity(*cperm);
372593348eSBarry Smith     return 0;
382593348eSBarry Smith   }
392593348eSBarry Smith 
40bcd2baecSBarry Smith   MatReorderingRegisterAll();
41bcd2baecSBarry Smith   ishift = a->indexshift;
42bcd2baecSBarry Smith   oshift = -MatReorderingIndexShift[(int)type];
43bcd2baecSBarry Smith   if (MatReorderingRequiresSymmetric[(int)type]) {
44bcd2baecSBarry Smith     ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja);
45bcd2baecSBarry Smith     CHKERRQ(ierr);
46bcd2baecSBarry Smith     ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr);
472593348eSBarry Smith     PetscFree(ia); PetscFree(ja);
48bcd2baecSBarry Smith   } else {
49bcd2baecSBarry Smith     if (ishift == oshift) {
50bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
51bcd2baecSBarry Smith     }
52bcd2baecSBarry Smith     else if (ishift == -1) {
53bcd2baecSBarry Smith       /* temporarily subtract 1 from i and j indices */
54bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
55bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
56bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
57bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
58bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
59bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
60bcd2baecSBarry Smith     } else {
61bcd2baecSBarry Smith       /* temporarily add 1 to i and j indices */
62bcd2baecSBarry Smith       int nz = a->i[a->n] - 1;
63bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]++;
64bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]++;
65bcd2baecSBarry Smith       ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr);
66bcd2baecSBarry Smith       for ( i=0; i<nz; i++ ) a->j[i]--;
67bcd2baecSBarry Smith       for ( i=0; i<a->n+1; i++ ) a->i[i]--;
68bcd2baecSBarry Smith     }
69bcd2baecSBarry Smith   }
702593348eSBarry Smith   return 0;
712593348eSBarry Smith }
722593348eSBarry Smith 
73de6a44a3SBarry Smith /*
74de6a44a3SBarry Smith      Adds diagonal pointers to sparse matrix structure.
75de6a44a3SBarry Smith */
76de6a44a3SBarry Smith 
77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A)
78de6a44a3SBarry Smith {
79de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
807fc0212eSBarry Smith   int         i,j, *diag, m = a->mbs;
81de6a44a3SBarry Smith 
82de6a44a3SBarry Smith   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
83de6a44a3SBarry Smith   PLogObjectMemory(A,(m+1)*sizeof(int));
847fc0212eSBarry Smith   for ( i=0; i<m; i++ ) {
85de6a44a3SBarry Smith     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
86de6a44a3SBarry Smith       if (a->j[j] == i) {
87de6a44a3SBarry Smith         diag[i] = j;
88de6a44a3SBarry Smith         break;
89de6a44a3SBarry Smith       }
90de6a44a3SBarry Smith     }
91de6a44a3SBarry Smith   }
92de6a44a3SBarry Smith   a->diag = diag;
93de6a44a3SBarry Smith   return 0;
94de6a44a3SBarry Smith }
952593348eSBarry Smith 
962593348eSBarry Smith #include "draw.h"
972593348eSBarry Smith #include "pinclude/pviewer.h"
9877c4ece6SBarry Smith #include "sys.h"
992593348eSBarry Smith 
100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
1012593348eSBarry Smith {
102b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1037e67e3f9SSatish Balay   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs;
104b6490206SBarry Smith   Scalar      *aa;
1052593348eSBarry Smith 
10690ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1072593348eSBarry Smith   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
1082593348eSBarry Smith   col_lens[0] = MAT_COOKIE;
1092593348eSBarry Smith   col_lens[1] = a->m;
1102593348eSBarry Smith   col_lens[2] = a->n;
1117e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
1122593348eSBarry Smith 
1132593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
114b6490206SBarry Smith   count = 0;
115b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
116b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
117b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
118b6490206SBarry Smith     }
1192593348eSBarry Smith   }
12077c4ece6SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
1212593348eSBarry Smith   PetscFree(col_lens);
1222593348eSBarry Smith 
1232593348eSBarry Smith   /* store column indices (zero start index) */
1247e67e3f9SSatish Balay   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
125b6490206SBarry Smith   count = 0;
126b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
127b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
128b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
129b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
130b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
1312593348eSBarry Smith         }
1322593348eSBarry Smith       }
133b6490206SBarry Smith     }
134b6490206SBarry Smith   }
1357e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
136b6490206SBarry Smith   PetscFree(jj);
1372593348eSBarry Smith 
1382593348eSBarry Smith   /* store nonzero values */
1397e67e3f9SSatish Balay   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
140b6490206SBarry Smith   count = 0;
141b6490206SBarry Smith   for ( i=0; i<a->mbs; i++ ) {
142b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
143b6490206SBarry Smith       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
144b6490206SBarry Smith         for ( l=0; l<bs; l++ ) {
1457e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
146b6490206SBarry Smith         }
147b6490206SBarry Smith       }
148b6490206SBarry Smith     }
149b6490206SBarry Smith   }
1507e67e3f9SSatish Balay   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
151b6490206SBarry Smith   PetscFree(aa);
1522593348eSBarry Smith   return 0;
1532593348eSBarry Smith }
1542593348eSBarry Smith 
155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
1562593348eSBarry Smith {
157b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1587e67e3f9SSatish Balay   int         ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs;
1592593348eSBarry Smith   FILE        *fd;
1602593348eSBarry Smith   char        *outputname;
1612593348eSBarry Smith 
16290ace30eSBarry Smith   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1632593348eSBarry Smith   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
16490ace30eSBarry Smith   ierr = ViewerGetFormat(viewer,&format);
16590ace30eSBarry Smith   if (format == ASCII_FORMAT_INFO) {
1662593348eSBarry Smith     /* no need to print additional information */ ;
1672593348eSBarry Smith   }
16890ace30eSBarry Smith   else if (format == ASCII_FORMAT_MATLAB) {
169b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
1702593348eSBarry Smith   }
1712593348eSBarry Smith   else {
172b6490206SBarry Smith     for ( i=0; i<a->mbs; i++ ) {
173b6490206SBarry Smith       for ( j=0; j<bs; j++ ) {
174b6490206SBarry Smith         fprintf(fd,"row %d:",i*bs+j);
175b6490206SBarry Smith         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
176b6490206SBarry Smith           for ( l=0; l<bs; l++ ) {
17788685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX)
1787e67e3f9SSatish Balay           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
17988685aaeSLois Curfman McInnes             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
1807e67e3f9SSatish Balay               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
18188685aaeSLois Curfman McInnes           }
18288685aaeSLois Curfman McInnes           else {
1837e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
18488685aaeSLois Curfman McInnes           }
18588685aaeSLois Curfman McInnes #else
1867e67e3f9SSatish Balay             fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
18788685aaeSLois Curfman McInnes #endif
1882593348eSBarry Smith           }
1892593348eSBarry Smith         }
1902593348eSBarry Smith         fprintf(fd,"\n");
1912593348eSBarry Smith       }
1922593348eSBarry Smith     }
193b6490206SBarry Smith   }
1942593348eSBarry Smith   fflush(fd);
1952593348eSBarry Smith   return 0;
1962593348eSBarry Smith }
1972593348eSBarry Smith 
198b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
1992593348eSBarry Smith {
2002593348eSBarry Smith   Mat         A = (Mat) obj;
20119bcc07fSBarry Smith   ViewerType  vtype;
20219bcc07fSBarry Smith   int         ierr;
2032593348eSBarry Smith 
2042593348eSBarry Smith   if (!viewer) {
20519bcc07fSBarry Smith     viewer = STDOUT_VIEWER_SELF;
2062593348eSBarry Smith   }
20719bcc07fSBarry Smith 
20819bcc07fSBarry Smith   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
20919bcc07fSBarry Smith   if (vtype == MATLAB_VIEWER) {
210b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
2112593348eSBarry Smith   }
21219bcc07fSBarry Smith   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
213b6490206SBarry Smith     return MatView_SeqBAIJ_ASCII(A,viewer);
2142593348eSBarry Smith   }
21519bcc07fSBarry Smith   else if (vtype == BINARY_FILE_VIEWER) {
216b6490206SBarry Smith     return MatView_SeqBAIJ_Binary(A,viewer);
2172593348eSBarry Smith   }
21819bcc07fSBarry Smith   else if (vtype == DRAW_VIEWER) {
219b6490206SBarry Smith     SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported");
2202593348eSBarry Smith   }
2212593348eSBarry Smith   return 0;
2222593348eSBarry Smith }
223b6490206SBarry Smith 
2247e67e3f9SSatish Balay #define CHUNKSIZE  1
225cd0e1443SSatish Balay 
226cd0e1443SSatish Balay /* This version has row oriented v  */
227cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
228cd0e1443SSatish Balay {
229cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
230cd0e1443SSatish Balay   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
231cd0e1443SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
232cd0e1443SSatish Balay   int         *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol;
2337e67e3f9SSatish Balay   int          ridx,cidx,bs2=bs*bs;
234cd0e1443SSatish Balay   Scalar      *ap,value,*aa=a->a,*bap;
235cd0e1443SSatish Balay 
236cd0e1443SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over added rows */
237cd0e1443SSatish Balay     row  = im[k]; brow = row/bs;
238cd0e1443SSatish Balay     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
239cd0e1443SSatish Balay     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
2407e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
241cd0e1443SSatish Balay     rmax = imax[brow]; nrow = ailen[brow];
242cd0e1443SSatish Balay     low = 0;
243cd0e1443SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over added columns */
244cd0e1443SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
245cd0e1443SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
246cd0e1443SSatish Balay       col = in[l] - shift; bcol = col/bs;
247cd0e1443SSatish Balay       ridx = row % bs; cidx = col % bs;
248cd0e1443SSatish Balay       if (roworiented) {
249cd0e1443SSatish Balay         value = *v++;
250cd0e1443SSatish Balay       }
251cd0e1443SSatish Balay       else {
252cd0e1443SSatish Balay         value = v[k + l*m];
253cd0e1443SSatish Balay       }
254cd0e1443SSatish Balay       if (!sorted) low = 0; high = nrow;
255cd0e1443SSatish Balay       while (high-low > 5) {
256cd0e1443SSatish Balay         t = (low+high)/2;
257cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
258cd0e1443SSatish Balay         else             low  = t;
259cd0e1443SSatish Balay       }
260cd0e1443SSatish Balay       for ( i=low; i<high; i++ ) {
261cd0e1443SSatish Balay         if (rp[i] > bcol) break;
262cd0e1443SSatish Balay         if (rp[i] == bcol) {
2637e67e3f9SSatish Balay           bap  = ap +  bs2*i + bs*cidx + ridx;
264cd0e1443SSatish Balay           if (is == ADD_VALUES) *bap += value;
265cd0e1443SSatish Balay           else                  *bap  = value;
266cd0e1443SSatish Balay           goto noinsert;
267cd0e1443SSatish Balay         }
268cd0e1443SSatish Balay       }
269cd0e1443SSatish Balay       if (nonew) goto noinsert;
270cd0e1443SSatish Balay       if (nrow >= rmax) {
271cd0e1443SSatish Balay         /* there is no extra room in row, therefore enlarge */
272cd0e1443SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
273cd0e1443SSatish Balay         Scalar *new_a;
274cd0e1443SSatish Balay 
275cd0e1443SSatish Balay         /* malloc new storage space */
2767e67e3f9SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
277cd0e1443SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
2787e67e3f9SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz);
279cd0e1443SSatish Balay         new_i   = new_j + new_nz;
280cd0e1443SSatish Balay 
281cd0e1443SSatish Balay         /* copy over old data into new slots */
282cd0e1443SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
2837e67e3f9SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
2847e67e3f9SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int));
285cd0e1443SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift);
286cd0e1443SSatish Balay         PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow,
287cd0e1443SSatish Balay                                                            len*sizeof(int));
2887e67e3f9SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar));
2897e67e3f9SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
2907e67e3f9SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE),
2917e67e3f9SSatish Balay                     aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar));
292cd0e1443SSatish Balay         /* free up old matrix storage */
293cd0e1443SSatish Balay         PetscFree(a->a);
294cd0e1443SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
295cd0e1443SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
296cd0e1443SSatish Balay         a->singlemalloc = 1;
297cd0e1443SSatish Balay 
2987e67e3f9SSatish Balay         rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
299cd0e1443SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
3007e67e3f9SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
3017e67e3f9SSatish Balay         a->maxnz += bs2*CHUNKSIZE;
302cd0e1443SSatish Balay         a->reallocs++;
3037e67e3f9SSatish Balay         a->nz+=bs2;
304cd0e1443SSatish Balay       }
3057e67e3f9SSatish Balay       N = nrow++ - 1;
306cd0e1443SSatish Balay       /* shift up all the later entries in this row */
307cd0e1443SSatish Balay       for ( ii=N; ii>=i; ii-- ) {
308cd0e1443SSatish Balay         rp[ii+1] = rp[ii];
3097e67e3f9SSatish Balay          PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
310cd0e1443SSatish Balay       }
3117e67e3f9SSatish Balay       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
312cd0e1443SSatish Balay       rp[i] = bcol;
3137e67e3f9SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
314cd0e1443SSatish Balay       noinsert:;
315cd0e1443SSatish Balay       low = i;
316cd0e1443SSatish Balay     }
317cd0e1443SSatish Balay     ailen[brow] = nrow;
318cd0e1443SSatish Balay   }
319cd0e1443SSatish Balay   return 0;
320cd0e1443SSatish Balay }
321cd0e1443SSatish Balay 
3220b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
3230b824a48SSatish Balay {
3240b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3250b824a48SSatish Balay   *m = a->m; *n = a->n;
3260b824a48SSatish Balay   return 0;
3270b824a48SSatish Balay }
3280b824a48SSatish Balay 
3290b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
3300b824a48SSatish Balay {
3310b824a48SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3320b824a48SSatish Balay   *m = 0; *n = a->m;
3330b824a48SSatish Balay   return 0;
3340b824a48SSatish Balay }
3350b824a48SSatish Balay 
3369867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3379867e207SSatish Balay {
3389867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
3397e67e3f9SSatish Balay   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
3409867e207SSatish Balay   Scalar      *aa,*v_i,*aa_i;
3419867e207SSatish Balay 
3429867e207SSatish Balay   bs  = a->bs;
3439867e207SSatish Balay   ai  = a->i;
3449867e207SSatish Balay   aj  = a->j;
3459867e207SSatish Balay   aa  = a->a;
3467e67e3f9SSatish Balay   bs2 = bs*bs;
3479867e207SSatish Balay 
3489867e207SSatish Balay   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
3499867e207SSatish Balay 
3509867e207SSatish Balay   bn  = row/bs;   /* Block number */
3519867e207SSatish Balay   bp  = row % bs; /* Block Position */
3529867e207SSatish Balay   M   = ai[bn+1] - ai[bn];
3539867e207SSatish Balay   *nz = bs*M;
3549867e207SSatish Balay 
3559867e207SSatish Balay   if (v) {
3569867e207SSatish Balay     *v = 0;
3579867e207SSatish Balay     if (*nz) {
3589867e207SSatish Balay       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
3599867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3609867e207SSatish Balay         v_i  = *v + i*bs;
3617e67e3f9SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
3627e67e3f9SSatish Balay         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
3639867e207SSatish Balay       }
3649867e207SSatish Balay     }
3659867e207SSatish Balay   }
3669867e207SSatish Balay 
3679867e207SSatish Balay   if (idx) {
3689867e207SSatish Balay     *idx = 0;
3699867e207SSatish Balay     if (*nz) {
3709867e207SSatish Balay       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
3719867e207SSatish Balay       for ( i=0; i<M; i++ ) { /* for each block in the block row */
3729867e207SSatish Balay         idx_i = *idx + i*bs;
3739867e207SSatish Balay         itmp  = bs*aj[ai[bn] + i];
3749867e207SSatish Balay         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
3759867e207SSatish Balay       }
3769867e207SSatish Balay     }
3779867e207SSatish Balay   }
3789867e207SSatish Balay   return 0;
3799867e207SSatish Balay }
3809867e207SSatish Balay 
3819867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
3829867e207SSatish Balay {
3839867e207SSatish Balay   if (idx) {if (*idx) PetscFree(*idx);}
3849867e207SSatish Balay   if (v)   {if (*v)   PetscFree(*v);}
3859867e207SSatish Balay   return 0;
3869867e207SSatish Balay }
387b6490206SBarry Smith 
388f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
389f2501298SSatish Balay {
390f2501298SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
391f2501298SSatish Balay   Mat         C;
392f2501298SSatish Balay   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
393f2501298SSatish Balay   int         shift=a->indexshift,*rows,*cols,bs2=bs*bs;
394f2501298SSatish Balay   Scalar      *array=a->a;
395f2501298SSatish Balay 
396f2501298SSatish Balay   if (B==PETSC_NULL && mbs!=nbs)
397f2501298SSatish Balay     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
398f2501298SSatish Balay   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
399f2501298SSatish Balay   PetscMemzero(col,(1+nbs)*sizeof(int));
400f2501298SSatish Balay   if (shift) {
401f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1;
402f2501298SSatish Balay   }
403f2501298SSatish Balay   for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1;
404f2501298SSatish Balay   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
405f2501298SSatish Balay   PetscFree(col);
406f2501298SSatish Balay   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
407f2501298SSatish Balay   cols = rows + bs;
408f2501298SSatish Balay   for ( i=0; i<mbs; i++ ) {
409f2501298SSatish Balay     cols[0] = i*bs;
410f2501298SSatish Balay     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
411f2501298SSatish Balay     len = ai[i+1] - ai[i];
412f2501298SSatish Balay     for ( j=0; j<len; j++ ) {
413f2501298SSatish Balay       rows[0] = (*aj++)*bs;
414f2501298SSatish Balay       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
415f2501298SSatish Balay       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
416f2501298SSatish Balay       array += bs2;
417f2501298SSatish Balay     }
418f2501298SSatish Balay   }
419*1073c447SSatish Balay   PetscFree(rows);
420f2501298SSatish Balay   if (shift) {
421f2501298SSatish Balay     for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1;
422f2501298SSatish Balay   }
423f2501298SSatish Balay 
424f2501298SSatish Balay   ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
425f2501298SSatish Balay   ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr);
426f2501298SSatish Balay 
427f2501298SSatish Balay   if (B != PETSC_NULL) {
428f2501298SSatish Balay     *B = C;
429f2501298SSatish Balay   } else {
430f2501298SSatish Balay     /* This isn't really an in-place transpose */
431f2501298SSatish Balay     PetscFree(a->a);
432f2501298SSatish Balay     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
433f2501298SSatish Balay     if (a->diag) PetscFree(a->diag);
434f2501298SSatish Balay     if (a->ilen) PetscFree(a->ilen);
435f2501298SSatish Balay     if (a->imax) PetscFree(a->imax);
436f2501298SSatish Balay     if (a->solve_work) PetscFree(a->solve_work);
437f2501298SSatish Balay     PetscFree(a);
438f2501298SSatish Balay     PetscMemcpy(A,C,sizeof(struct _Mat));
439f2501298SSatish Balay     PetscHeaderDestroy(C);
440f2501298SSatish Balay   }
441f2501298SSatish Balay   return 0;
442f2501298SSatish Balay }
443f2501298SSatish Balay 
444f2501298SSatish Balay 
445584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
446584200bdSSatish Balay {
447584200bdSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
448584200bdSSatish Balay   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
449584200bdSSatish Balay   int        m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift;
450584200bdSSatish Balay   int        bs = a->bs,  mbs = a->mbs, bs2 = bs*bs;
451584200bdSSatish Balay   Scalar     *aa = a->a, *ap;
452584200bdSSatish Balay 
453584200bdSSatish Balay   if (mode == FLUSH_ASSEMBLY) return 0;
454584200bdSSatish Balay 
455584200bdSSatish Balay   for ( i=1; i<mbs; i++ ) {
456584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
457584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
458584200bdSSatish Balay     if (fshift) {
459584200bdSSatish Balay       ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift;
460584200bdSSatish Balay       N = ailen[i];
461584200bdSSatish Balay       for ( j=0; j<N; j++ ) {
462584200bdSSatish Balay         ip[j-fshift] = ip[j];
4637e67e3f9SSatish Balay         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
464584200bdSSatish Balay       }
465584200bdSSatish Balay     }
466584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
467584200bdSSatish Balay   }
468584200bdSSatish Balay   if (mbs) {
469584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
470584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
471584200bdSSatish Balay   }
472584200bdSSatish Balay   /* reset ilen and imax for each row */
473584200bdSSatish Balay   for ( i=0; i<mbs; i++ ) {
474584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
475584200bdSSatish Balay   }
476584200bdSSatish Balay   a->nz = (ai[m] + shift)*bs2;
477584200bdSSatish Balay 
478584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
479584200bdSSatish Balay   if (fshift && a->diag) {
480584200bdSSatish Balay     PetscFree(a->diag);
481584200bdSSatish Balay     PLogObjectMemory(A,-(m+1)*sizeof(int));
482584200bdSSatish Balay     a->diag = 0;
483584200bdSSatish Balay   }
484584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n",
485584200bdSSatish Balay            fshift,a->nz,m);
486584200bdSSatish Balay   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
487584200bdSSatish Balay            a->reallocs);
488584200bdSSatish Balay   return 0;
489584200bdSSatish Balay }
490584200bdSSatish Balay 
491b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A)
4922593348eSBarry Smith {
493b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
494de6a44a3SBarry Smith   PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar));
4952593348eSBarry Smith   return 0;
4962593348eSBarry Smith }
4972593348eSBarry Smith 
498b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj)
4992593348eSBarry Smith {
5002593348eSBarry Smith   Mat         A  = (Mat) obj;
501b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5022593348eSBarry Smith 
5032593348eSBarry Smith #if defined(PETSC_LOG)
5042593348eSBarry Smith   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
5052593348eSBarry Smith #endif
5062593348eSBarry Smith   PetscFree(a->a);
5072593348eSBarry Smith   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
5082593348eSBarry Smith   if (a->diag) PetscFree(a->diag);
5092593348eSBarry Smith   if (a->ilen) PetscFree(a->ilen);
5102593348eSBarry Smith   if (a->imax) PetscFree(a->imax);
5112593348eSBarry Smith   if (a->solve_work) PetscFree(a->solve_work);
512de6a44a3SBarry Smith   if (a->mult_work) PetscFree(a->mult_work);
5132593348eSBarry Smith   PetscFree(a);
514f2655603SLois Curfman McInnes   PLogObjectDestroy(A);
515f2655603SLois Curfman McInnes   PetscHeaderDestroy(A);
5162593348eSBarry Smith   return 0;
5172593348eSBarry Smith }
5182593348eSBarry Smith 
519b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
5202593348eSBarry Smith {
521b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
5222593348eSBarry Smith   if      (op == ROW_ORIENTED)              a->roworiented = 1;
5232593348eSBarry Smith   else if (op == COLUMN_ORIENTED)           a->roworiented = 0;
5242593348eSBarry Smith   else if (op == COLUMNS_SORTED)            a->sorted      = 1;
5252593348eSBarry Smith   else if (op == NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
5262593348eSBarry Smith   else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
5272593348eSBarry Smith   else if (op == ROWS_SORTED ||
5282593348eSBarry Smith            op == SYMMETRIC_MATRIX ||
5292593348eSBarry Smith            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
5302593348eSBarry Smith            op == YES_NEW_DIAGONALS)
53194a424c1SBarry Smith     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
5322593348eSBarry Smith   else if (op == NO_NEW_DIAGONALS)
533b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");}
5342593348eSBarry Smith   else
535b6490206SBarry Smith     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
5362593348eSBarry Smith   return 0;
5372593348eSBarry Smith }
5382593348eSBarry Smith 
5392593348eSBarry Smith 
5402593348eSBarry Smith /* -------------------------------------------------------*/
5412593348eSBarry Smith /* Should check that shapes of vectors and matrices match */
5422593348eSBarry Smith /* -------------------------------------------------------*/
543b6490206SBarry Smith #include "pinclude/plapack.h"
544b6490206SBarry Smith 
545b6490206SBarry Smith static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy)
5462593348eSBarry Smith {
547b6490206SBarry Smith   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
548b6490206SBarry Smith   Scalar          *xg,*yg;
549b6490206SBarry Smith   register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5;
550b6490206SBarry Smith   register Scalar x1,x2,x3,x4,x5;
551b6490206SBarry Smith   int             mbs = a->mbs, m = a->m, i, *idx,*ii;
552b6490206SBarry Smith   int             bs = a->bs,j,n,bs2 = bs*bs;
5532593348eSBarry Smith 
554b6490206SBarry Smith   VecGetArray(xx,&xg); x = xg;  VecGetArray(yy,&yg); y = yg;
555b6490206SBarry Smith   PetscMemzero(y,m*sizeof(Scalar));
556b6490206SBarry Smith   x     = x;
5572593348eSBarry Smith   idx   = a->j;
5582593348eSBarry Smith   v     = a->a;
5592593348eSBarry Smith   ii    = a->i;
560b6490206SBarry Smith 
561b6490206SBarry Smith   switch (bs) {
562b6490206SBarry Smith     case 1:
5632593348eSBarry Smith      for ( i=0; i<m; i++ ) {
5642593348eSBarry Smith        n    = ii[1] - ii[0]; ii++;
5652593348eSBarry Smith        sum  = 0.0;
5662593348eSBarry Smith        while (n--) sum += *v++ * x[*idx++];
5672593348eSBarry Smith        y[i] = sum;
5682593348eSBarry Smith       }
5692593348eSBarry Smith       break;
570b6490206SBarry Smith     case 2:
571b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
572de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
573b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0;
574b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
575b6490206SBarry Smith           xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
576b6490206SBarry Smith           sum1 += v[0]*x1 + v[2]*x2;
577b6490206SBarry Smith           sum2 += v[1]*x1 + v[3]*x2;
578b6490206SBarry Smith           v += 4;
579b6490206SBarry Smith         }
580b6490206SBarry Smith         y[0] += sum1; y[1] += sum2;
581b6490206SBarry Smith         y += 2;
582b6490206SBarry Smith       }
583b6490206SBarry Smith       break;
584b6490206SBarry Smith     case 3:
585b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
586de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
587b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
588b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
589b6490206SBarry Smith           xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
590b6490206SBarry Smith           sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
591b6490206SBarry Smith           sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
592b6490206SBarry Smith           sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
593b6490206SBarry Smith           v += 9;
594b6490206SBarry Smith         }
595b6490206SBarry Smith         y[0] += sum1; y[1] += sum2; y[2] += sum3;
596b6490206SBarry Smith         y += 3;
597b6490206SBarry Smith       }
598b6490206SBarry Smith       break;
599b6490206SBarry Smith     case 4:
600b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
601de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
602b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
603b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
604b6490206SBarry Smith           xb = x + 4*(*idx++);
605b6490206SBarry Smith           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
606b6490206SBarry Smith           sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
607b6490206SBarry Smith           sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
608b6490206SBarry Smith           sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
609b6490206SBarry Smith           sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
610b6490206SBarry Smith           v += 16;
611b6490206SBarry Smith         }
612b6490206SBarry Smith         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4;
613b6490206SBarry Smith         y += 4;
614b6490206SBarry Smith       }
615b6490206SBarry Smith       break;
616b6490206SBarry Smith     case 5:
617b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
618de6a44a3SBarry Smith         n  = ii[1] - ii[0]; ii++;
619b6490206SBarry Smith         sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
620b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
621b6490206SBarry Smith           xb = x + 5*(*idx++);
622b6490206SBarry Smith           x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
623b6490206SBarry Smith           sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
624b6490206SBarry Smith           sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
625b6490206SBarry Smith           sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
626b6490206SBarry Smith           sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
627b6490206SBarry Smith           sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
628b6490206SBarry Smith           v += 25;
629b6490206SBarry Smith         }
630b6490206SBarry Smith         y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5;
631b6490206SBarry Smith         y += 5;
632b6490206SBarry Smith       }
633b6490206SBarry Smith       break;
634b6490206SBarry Smith       /* block sizes larger then 5 by 5 are handled by BLAS */
635b6490206SBarry Smith     default: {
636de6a44a3SBarry Smith       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
637de6a44a3SBarry Smith       if (!a->mult_work) {
638de6a44a3SBarry Smith         a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar));
639de6a44a3SBarry Smith         CHKPTRQ(a->mult_work);
640de6a44a3SBarry Smith       }
641de6a44a3SBarry Smith       work = a->mult_work;
642b6490206SBarry Smith       for ( i=0; i<mbs; i++ ) {
643de6a44a3SBarry Smith         n     = ii[1] - ii[0]; ii++;
644de6a44a3SBarry Smith         ncols = n*bs;
645de6a44a3SBarry Smith         workt = work;
646b6490206SBarry Smith         for ( j=0; j<n; j++ ) {
647b6490206SBarry Smith           xb = x + bs*(*idx++);
648de6a44a3SBarry Smith           for ( k=0; k<bs; k++ ) workt[k] = xb[k];
649de6a44a3SBarry Smith           workt += bs;
650b6490206SBarry Smith         }
651de6a44a3SBarry Smith         LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One);
652de6a44a3SBarry Smith         v += n*bs2;
653b6490206SBarry Smith         y += bs;
6542593348eSBarry Smith       }
6552593348eSBarry Smith     }
6562593348eSBarry Smith   }
657b6490206SBarry Smith   PLogFlops(2*bs2*a->nz - m);
6582593348eSBarry Smith   return 0;
6592593348eSBarry Smith }
6602593348eSBarry Smith 
661de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem)
6622593348eSBarry Smith {
663b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
664bcd2baecSBarry Smith   if (nz)  *nz  = a->bs*a->bs*a->nz;
665bcd2baecSBarry Smith   if (nza) *nza = a->maxnz;
666bcd2baecSBarry Smith   if (mem) *mem = (int)A->mem;
6672593348eSBarry Smith   return 0;
6682593348eSBarry Smith }
6692593348eSBarry Smith 
67091d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
67191d316f6SSatish Balay {
67291d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
67391d316f6SSatish Balay 
67491d316f6SSatish Balay   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
67591d316f6SSatish Balay 
67691d316f6SSatish Balay   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
67791d316f6SSatish Balay   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
67891d316f6SSatish Balay       (a->nz != b->nz)||(a->indexshift != b->indexshift)) {
67991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
68091d316f6SSatish Balay   }
68191d316f6SSatish Balay 
68291d316f6SSatish Balay   /* if the a->i are the same */
68391d316f6SSatish Balay   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
68491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
68591d316f6SSatish Balay   }
68691d316f6SSatish Balay 
68791d316f6SSatish Balay   /* if a->j are the same */
68891d316f6SSatish Balay   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
68991d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
69091d316f6SSatish Balay   }
69191d316f6SSatish Balay 
69291d316f6SSatish Balay   /* if a->a are the same */
69391d316f6SSatish Balay   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
69491d316f6SSatish Balay     *flg = PETSC_FALSE; return 0;
69591d316f6SSatish Balay   }
69691d316f6SSatish Balay   *flg = PETSC_TRUE;
69791d316f6SSatish Balay   return 0;
69891d316f6SSatish Balay 
69991d316f6SSatish Balay }
70091d316f6SSatish Balay 
70191d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
70291d316f6SSatish Balay {
70391d316f6SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7047e67e3f9SSatish Balay   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
70517e48fc4SSatish Balay   Scalar      *x, zero = 0.0,*aa,*aa_j;
70617e48fc4SSatish Balay 
70717e48fc4SSatish Balay   bs   = a->bs;
70817e48fc4SSatish Balay   aa   = a->a;
70917e48fc4SSatish Balay   ai   = a->i;
71017e48fc4SSatish Balay   aj   = a->j;
71117e48fc4SSatish Balay   ambs = a->mbs;
7127e67e3f9SSatish Balay   bs2  = bs*bs;
71391d316f6SSatish Balay 
71491d316f6SSatish Balay   VecSet(&zero,v);
71591d316f6SSatish Balay   VecGetArray(v,&x); VecGetLocalSize(v,&n);
7169867e207SSatish Balay   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
71717e48fc4SSatish Balay   for ( i=0; i<ambs; i++ ) {
71817e48fc4SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
71917e48fc4SSatish Balay       if (aj[j] == i) {
72017e48fc4SSatish Balay         row  = i*bs;
7217e67e3f9SSatish Balay         aa_j = aa+j*bs2;
7227e67e3f9SSatish Balay         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
72391d316f6SSatish Balay         break;
72491d316f6SSatish Balay       }
72591d316f6SSatish Balay     }
72691d316f6SSatish Balay   }
72791d316f6SSatish Balay   return 0;
72891d316f6SSatish Balay }
72991d316f6SSatish Balay 
7309867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
7319867e207SSatish Balay {
7329867e207SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7339867e207SSatish Balay   Scalar      *l,*r,x,*v,*aa,*li,*ri;
7347e67e3f9SSatish Balay   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
7359867e207SSatish Balay 
7369867e207SSatish Balay   ai  = a->i;
7379867e207SSatish Balay   aj  = a->j;
7389867e207SSatish Balay   aa  = a->a;
7399867e207SSatish Balay   m   = a->m;
7409867e207SSatish Balay   n   = a->n;
7419867e207SSatish Balay   bs  = a->bs;
7429867e207SSatish Balay   mbs = a->mbs;
7437e67e3f9SSatish Balay   bs2 = bs*bs;
7449867e207SSatish Balay   if (ll) {
7459867e207SSatish Balay     VecGetArray(ll,&l); VecGetSize(ll,&lm);
7469867e207SSatish Balay     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
7479867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
7489867e207SSatish Balay       M  = ai[i+1] - ai[i];
7499867e207SSatish Balay       li = l + i*bs;
7507e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
7519867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
7527e67e3f9SSatish Balay         for ( k=0; k<bs2; k++ ) {
7539867e207SSatish Balay           (*v++) *= li[k%bs];
7549867e207SSatish Balay         }
7559867e207SSatish Balay       }
7569867e207SSatish Balay     }
7579867e207SSatish Balay   }
7589867e207SSatish Balay 
7599867e207SSatish Balay   if (rr) {
7609867e207SSatish Balay     VecGetArray(rr,&r); VecGetSize(rr,&rn);
7619867e207SSatish Balay     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
7629867e207SSatish Balay     for ( i=0; i<mbs; i++ ) { /* for each block row */
7639867e207SSatish Balay       M  = ai[i+1] - ai[i];
7647e67e3f9SSatish Balay       v  = aa + bs2*ai[i];
7659867e207SSatish Balay       for ( j=0; j<M; j++ ) { /* for each block */
7669867e207SSatish Balay         ri = r + bs*aj[ai[i]+j];
7679867e207SSatish Balay         for ( k=0; k<bs; k++ ) {
7689867e207SSatish Balay           x = ri[k];
7699867e207SSatish Balay           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
7709867e207SSatish Balay         }
7719867e207SSatish Balay       }
7729867e207SSatish Balay     }
7739867e207SSatish Balay   }
7749867e207SSatish Balay   return 0;
7759867e207SSatish Balay }
7769867e207SSatish Balay 
7779867e207SSatish Balay 
778b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
779b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
780b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec);
7817fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
7827fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
7837fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
7847fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
7857fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
7867fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
7872593348eSBarry Smith 
788b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
7892593348eSBarry Smith {
790b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
7912593348eSBarry Smith   Scalar      *v = a->a;
7922593348eSBarry Smith   double      sum = 0.0;
793b6490206SBarry Smith   int         i;
7942593348eSBarry Smith 
7952593348eSBarry Smith   if (type == NORM_FROBENIUS) {
7962593348eSBarry Smith     for (i=0; i<a->nz; i++ ) {
7972593348eSBarry Smith #if defined(PETSC_COMPLEX)
7982593348eSBarry Smith       sum += real(conj(*v)*(*v)); v++;
7992593348eSBarry Smith #else
8002593348eSBarry Smith       sum += (*v)*(*v); v++;
8012593348eSBarry Smith #endif
8022593348eSBarry Smith     }
8032593348eSBarry Smith     *norm = sqrt(sum);
8042593348eSBarry Smith   }
8052593348eSBarry Smith   else {
806b6490206SBarry Smith     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
8072593348eSBarry Smith   }
8082593348eSBarry Smith   return 0;
8092593348eSBarry Smith }
8102593348eSBarry Smith 
8112593348eSBarry Smith /*
8122593348eSBarry Smith      note: This can only work for identity for row and col. It would
8132593348eSBarry Smith    be good to check this and otherwise generate an error.
8142593348eSBarry Smith */
815b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
8162593348eSBarry Smith {
817b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
8182593348eSBarry Smith   Mat         outA;
819de6a44a3SBarry Smith   int         ierr;
8202593348eSBarry Smith 
821b6490206SBarry Smith   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
8222593348eSBarry Smith 
8232593348eSBarry Smith   outA          = inA;
8242593348eSBarry Smith   inA->factor   = FACTOR_LU;
8252593348eSBarry Smith   a->row        = row;
8262593348eSBarry Smith   a->col        = col;
8272593348eSBarry Smith 
828de6a44a3SBarry Smith   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
8292593348eSBarry Smith 
8302593348eSBarry Smith   if (!a->diag) {
831b6490206SBarry Smith     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
8322593348eSBarry Smith   }
8337fc0212eSBarry Smith   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
8342593348eSBarry Smith   return 0;
8352593348eSBarry Smith }
8362593348eSBarry Smith 
837b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
8382593348eSBarry Smith {
839b6490206SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
840b6490206SBarry Smith   int         one = 1, totalnz = a->bs*a->bs*a->nz;
841b6490206SBarry Smith   BLscal_( &totalnz, alpha, a->a, &one );
842b6490206SBarry Smith   PLogFlops(totalnz);
8432593348eSBarry Smith   return 0;
8442593348eSBarry Smith }
8452593348eSBarry Smith 
8467e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
8477e67e3f9SSatish Balay {
8487e67e3f9SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
8497e67e3f9SSatish Balay   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
8507e67e3f9SSatish Balay   int        *ai = a->i, *ailen = a->ilen, shift = a->indexshift;
8517e67e3f9SSatish Balay   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs;
8527e67e3f9SSatish Balay   Scalar     *ap, *aa = a->a, zero = 0.0;
8537e67e3f9SSatish Balay 
8547e67e3f9SSatish Balay   for ( k=0; k<m; k++ ) { /* loop over rows */
8557e67e3f9SSatish Balay     row  = im[k]; brow = row/bs;
8567e67e3f9SSatish Balay     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
8577e67e3f9SSatish Balay     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
8587e67e3f9SSatish Balay     rp   = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift;
8597e67e3f9SSatish Balay     nrow = ailen[brow];
8607e67e3f9SSatish Balay     for ( l=0; l<n; l++ ) { /* loop over columns */
8617e67e3f9SSatish Balay       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
8627e67e3f9SSatish Balay       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
8637e67e3f9SSatish Balay       col  = in[l] - shift;
8647e67e3f9SSatish Balay       bcol = col/bs;
8657e67e3f9SSatish Balay       cidx = col%bs;
8667e67e3f9SSatish Balay       ridx = row%bs;
8677e67e3f9SSatish Balay       high = nrow;
8687e67e3f9SSatish Balay       low  = 0; /* assume unsorted */
8697e67e3f9SSatish Balay       while (high-low > 5) {
8707e67e3f9SSatish Balay         t = (low+high)/2;
8717e67e3f9SSatish Balay         if (rp[t] > bcol) high = t;
8727e67e3f9SSatish Balay         else             low  = t;
8737e67e3f9SSatish Balay       }
8747e67e3f9SSatish Balay       for ( i=low; i<high; i++ ) {
8757e67e3f9SSatish Balay         if (rp[i] > bcol) break;
8767e67e3f9SSatish Balay         if (rp[i] == bcol) {
8777e67e3f9SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
8787e67e3f9SSatish Balay           goto finished;
8797e67e3f9SSatish Balay         }
8807e67e3f9SSatish Balay       }
8817e67e3f9SSatish Balay       *v++ = zero;
8827e67e3f9SSatish Balay       finished:;
8837e67e3f9SSatish Balay     }
8847e67e3f9SSatish Balay   }
8857e67e3f9SSatish Balay   return 0;
8867e67e3f9SSatish Balay }
8877e67e3f9SSatish Balay 
888b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A)
8892593348eSBarry Smith {
8902593348eSBarry Smith   static int called = 0;
8912593348eSBarry Smith 
8922593348eSBarry Smith   if (called) return 0; else called = 1;
8932593348eSBarry Smith   return 0;
8942593348eSBarry Smith }
8952593348eSBarry Smith /* -------------------------------------------------------------------*/
896cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
8979867e207SSatish Balay        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
898b6490206SBarry Smith        MatMult_SeqBAIJ,0,
899b6490206SBarry Smith        0,0,
900de6a44a3SBarry Smith        MatSolve_SeqBAIJ,0,
901b6490206SBarry Smith        0,0,
902de6a44a3SBarry Smith        MatLUFactor_SeqBAIJ,0,
903b6490206SBarry Smith        0,
904f2501298SSatish Balay        MatTranspose_SeqBAIJ,
90517e48fc4SSatish Balay        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
9069867e207SSatish Balay        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
907584200bdSSatish Balay        0,MatAssemblyEnd_SeqBAIJ,
908b6490206SBarry Smith        0,
909b6490206SBarry Smith        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0,
910b6490206SBarry Smith        MatGetReordering_SeqBAIJ,
9117fc0212eSBarry Smith        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
912b6490206SBarry Smith        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
913de6a44a3SBarry Smith        MatILUFactorSymbolic_SeqBAIJ,0,
914b6490206SBarry Smith        0,0,/*  MatConvert_SeqBAIJ  */ 0,
915b6490206SBarry Smith        0,0,
916b6490206SBarry Smith        MatConvertSameType_SeqBAIJ,0,0,
917b6490206SBarry Smith        MatILUFactor_SeqBAIJ,0,0,
918b6490206SBarry Smith        0,0,
9197e67e3f9SSatish Balay        MatGetValues_SeqBAIJ,0,
920b6490206SBarry Smith        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
921b6490206SBarry Smith        0};
9222593348eSBarry Smith 
9232593348eSBarry Smith /*@C
924b6490206SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in AIJ (compressed row) format
9252593348eSBarry Smith    (the default parallel PETSc format).  For good matrix assembly performance
9262593348eSBarry Smith    the user should preallocate the matrix storage by setting the parameter nz
9272593348eSBarry Smith    (or nzz).  By setting these parameters accurately, performance can be
9282593348eSBarry Smith    increased by more than a factor of 50.
9292593348eSBarry Smith 
9302593348eSBarry Smith    Input Parameters:
9312593348eSBarry Smith .  comm - MPI communicator, set to MPI_COMM_SELF
932b6490206SBarry Smith .  bs - size of block
9332593348eSBarry Smith .  m - number of rows
9342593348eSBarry Smith .  n - number of columns
935b6490206SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
936b6490206SBarry Smith .  nzz - number of block nonzeros per block row or PETSC_NULL
937b6490206SBarry Smith          (possibly different for each row)
9382593348eSBarry Smith 
9392593348eSBarry Smith    Output Parameter:
9402593348eSBarry Smith .  A - the matrix
9412593348eSBarry Smith 
9422593348eSBarry Smith    Notes:
943b6490206SBarry Smith    The BAIJ format, is fully compatible with standard Fortran 77
9442593348eSBarry Smith    storage.  That is, the stored row and column indices can begin at
9452593348eSBarry Smith    either one (as in Fortran) or zero.  See the users manual for details.
9462593348eSBarry Smith 
9472593348eSBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
9482593348eSBarry Smith    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
9492593348eSBarry Smith    allocation.  For additional details, see the users manual chapter on
9502593348eSBarry Smith    matrices and the file $(PETSC_DIR)/Performance.
9512593348eSBarry Smith 
9522593348eSBarry Smith .seealso: MatCreate(), MatCreateMPIAIJ(), MatSetValues()
9532593348eSBarry Smith @*/
954b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
9552593348eSBarry Smith {
9562593348eSBarry Smith   Mat         B;
957b6490206SBarry Smith   Mat_SeqBAIJ *b;
958f2501298SSatish Balay   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs;
959b6490206SBarry Smith 
960f2501298SSatish Balay   if (mbs*bs!=m || nbs*bs!=n)
961f2501298SSatish Balay     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
9622593348eSBarry Smith 
9632593348eSBarry Smith   *A = 0;
964b6490206SBarry Smith   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
9652593348eSBarry Smith   PLogObjectCreate(B);
966b6490206SBarry Smith   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
9672593348eSBarry Smith   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
9687fc0212eSBarry Smith   switch (bs) {
9697fc0212eSBarry Smith     case 1:
9707fc0212eSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
9717fc0212eSBarry Smith       break;
9724eeb42bcSBarry Smith     case 2:
9734eeb42bcSBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
9746d84be18SBarry Smith       break;
975f327dd97SBarry Smith     case 3:
976f327dd97SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
9774eeb42bcSBarry Smith       break;
9786d84be18SBarry Smith     case 4:
9796d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
9806d84be18SBarry Smith       break;
9816d84be18SBarry Smith     case 5:
9826d84be18SBarry Smith       B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
9836d84be18SBarry Smith       break;
9847fc0212eSBarry Smith   }
985b6490206SBarry Smith   B->destroy          = MatDestroy_SeqBAIJ;
986b6490206SBarry Smith   B->view             = MatView_SeqBAIJ;
9872593348eSBarry Smith   B->factor           = 0;
9882593348eSBarry Smith   B->lupivotthreshold = 1.0;
9892593348eSBarry Smith   b->row              = 0;
9902593348eSBarry Smith   b->col              = 0;
9912593348eSBarry Smith   b->reallocs         = 0;
9922593348eSBarry Smith 
9932593348eSBarry Smith   b->m       = m;
994b6490206SBarry Smith   b->mbs     = mbs;
9952593348eSBarry Smith   b->n       = n;
996f2501298SSatish Balay   b->nbs     = nbs;
997b6490206SBarry Smith   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
9982593348eSBarry Smith   if (nnz == PETSC_NULL) {
9997e67e3f9SSatish Balay     if (nz == PETSC_DEFAULT) nz = CHUNKSIZE;
10002593348eSBarry Smith     else if (nz <= 0)        nz = 1;
1001b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1002b6490206SBarry Smith     nz = nz*mbs;
10032593348eSBarry Smith   }
10042593348eSBarry Smith   else {
10052593348eSBarry Smith     nz = 0;
1006b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
10072593348eSBarry Smith   }
10082593348eSBarry Smith 
10092593348eSBarry Smith   /* allocate the matrix space */
10107e67e3f9SSatish Balay   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
10112593348eSBarry Smith   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
10127e67e3f9SSatish Balay   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
10137e67e3f9SSatish Balay   b->j  = (int *) (b->a + nz*bs2);
10142593348eSBarry Smith   PetscMemzero(b->j,nz*sizeof(int));
10152593348eSBarry Smith   b->i  = b->j + nz;
10162593348eSBarry Smith   b->singlemalloc = 1;
10172593348eSBarry Smith 
1018de6a44a3SBarry Smith   b->i[0] = 0;
1019b6490206SBarry Smith   for (i=1; i<mbs+1; i++) {
10202593348eSBarry Smith     b->i[i] = b->i[i-1] + b->imax[i-1];
10212593348eSBarry Smith   }
10222593348eSBarry Smith 
1023b6490206SBarry Smith   /* b->ilen will count nonzeros in each block row so far. */
1024b6490206SBarry Smith   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1025b6490206SBarry Smith   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1026b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
10272593348eSBarry Smith 
1028b6490206SBarry Smith   b->bs               = bs;
1029b6490206SBarry Smith   b->mbs              = mbs;
10302593348eSBarry Smith   b->nz               = 0;
10312593348eSBarry Smith   b->maxnz            = nz;
10322593348eSBarry Smith   b->sorted           = 0;
10332593348eSBarry Smith   b->roworiented      = 1;
10342593348eSBarry Smith   b->nonew            = 0;
10352593348eSBarry Smith   b->diag             = 0;
10362593348eSBarry Smith   b->solve_work       = 0;
1037de6a44a3SBarry Smith   b->mult_work        = 0;
10382593348eSBarry Smith   b->spptr            = 0;
1039*1073c447SSatish Balay   b->indexshift       = 0;
10402593348eSBarry Smith   *A = B;
10412593348eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
10422593348eSBarry Smith   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
10432593348eSBarry Smith   return 0;
10442593348eSBarry Smith }
10452593348eSBarry Smith 
1046b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
10472593348eSBarry Smith {
10482593348eSBarry Smith   Mat         C;
1049b6490206SBarry Smith   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
10507e67e3f9SSatish Balay   int         i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs;
1051de6a44a3SBarry Smith 
1052de6a44a3SBarry Smith   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
10532593348eSBarry Smith 
10542593348eSBarry Smith   *B = 0;
1055b6490206SBarry Smith   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
10562593348eSBarry Smith   PLogObjectCreate(C);
1057b6490206SBarry Smith   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
10582593348eSBarry Smith   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1059b6490206SBarry Smith   C->destroy    = MatDestroy_SeqBAIJ;
1060b6490206SBarry Smith   C->view       = MatView_SeqBAIJ;
10612593348eSBarry Smith   C->factor     = A->factor;
10622593348eSBarry Smith   c->row        = 0;
10632593348eSBarry Smith   c->col        = 0;
10642593348eSBarry Smith   C->assembled  = PETSC_TRUE;
10652593348eSBarry Smith 
10662593348eSBarry Smith   c->m          = a->m;
10672593348eSBarry Smith   c->n          = a->n;
1068b6490206SBarry Smith   c->bs         = a->bs;
1069b6490206SBarry Smith   c->mbs        = a->mbs;
1070b6490206SBarry Smith   c->nbs        = a->nbs;
1071*1073c447SSatish Balay   c->indexshift = 0;
10722593348eSBarry Smith 
1073b6490206SBarry Smith   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1074b6490206SBarry Smith   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1075b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
10762593348eSBarry Smith     c->imax[i] = a->imax[i];
10772593348eSBarry Smith     c->ilen[i] = a->ilen[i];
10782593348eSBarry Smith   }
10792593348eSBarry Smith 
10802593348eSBarry Smith   /* allocate the matrix space */
10812593348eSBarry Smith   c->singlemalloc = 1;
10827e67e3f9SSatish Balay   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
10832593348eSBarry Smith   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
10847e67e3f9SSatish Balay   c->j  = (int *) (c->a + nz*bs2);
1085de6a44a3SBarry Smith   c->i  = c->j + nz;
1086b6490206SBarry Smith   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1087b6490206SBarry Smith   if (mbs > 0) {
1088de6a44a3SBarry Smith     PetscMemcpy(c->j,a->j,nz*sizeof(int));
10892593348eSBarry Smith     if (cpvalues == COPY_VALUES) {
10907e67e3f9SSatish Balay       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
10912593348eSBarry Smith     }
10922593348eSBarry Smith   }
10932593348eSBarry Smith 
1094b6490206SBarry Smith   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
10952593348eSBarry Smith   c->sorted      = a->sorted;
10962593348eSBarry Smith   c->roworiented = a->roworiented;
10972593348eSBarry Smith   c->nonew       = a->nonew;
10982593348eSBarry Smith 
10992593348eSBarry Smith   if (a->diag) {
1100b6490206SBarry Smith     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1101b6490206SBarry Smith     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1102b6490206SBarry Smith     for ( i=0; i<mbs; i++ ) {
11032593348eSBarry Smith       c->diag[i] = a->diag[i];
11042593348eSBarry Smith     }
11052593348eSBarry Smith   }
11062593348eSBarry Smith   else c->diag          = 0;
11072593348eSBarry Smith   c->nz                 = a->nz;
11082593348eSBarry Smith   c->maxnz              = a->maxnz;
11092593348eSBarry Smith   c->solve_work         = 0;
11102593348eSBarry Smith   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
11117fc0212eSBarry Smith   c->mult_work          = 0;
11122593348eSBarry Smith   *B = C;
11132593348eSBarry Smith   return 0;
11142593348eSBarry Smith }
11152593348eSBarry Smith 
111619bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
11172593348eSBarry Smith {
1118b6490206SBarry Smith   Mat_SeqBAIJ  *a;
11192593348eSBarry Smith   Mat          B;
1120de6a44a3SBarry Smith   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1121b6490206SBarry Smith   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
112235aab85fSBarry Smith   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1123de6a44a3SBarry Smith   int          *masked, nmask,tmp,ishift,bs2;
1124b6490206SBarry Smith   Scalar       *aa;
112519bcc07fSBarry Smith   MPI_Comm     comm = ((PetscObject) viewer)->comm;
11262593348eSBarry Smith 
112735aab85fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1128de6a44a3SBarry Smith   bs2  = bs*bs;
1129b6490206SBarry Smith 
11302593348eSBarry Smith   MPI_Comm_size(comm,&size);
1131b6490206SBarry Smith   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
113290ace30eSBarry Smith   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
113377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1134de6a44a3SBarry Smith   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
11352593348eSBarry Smith   M = header[1]; N = header[2]; nz = header[3];
11362593348eSBarry Smith 
1137b6490206SBarry Smith   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
113835aab85fSBarry Smith 
113935aab85fSBarry Smith   /*
114035aab85fSBarry Smith      This code adds extra rows to make sure the number of rows is
114135aab85fSBarry Smith     divisible by the blocksize
114235aab85fSBarry Smith   */
1143b6490206SBarry Smith   mbs        = M/bs;
114435aab85fSBarry Smith   extra_rows = bs - M + bs*(mbs);
114535aab85fSBarry Smith   if (extra_rows == bs) extra_rows = 0;
114635aab85fSBarry Smith   else                  mbs++;
114735aab85fSBarry Smith   if (extra_rows) {
114835aab85fSBarry Smith     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize");
114935aab85fSBarry Smith   }
1150b6490206SBarry Smith 
11512593348eSBarry Smith   /* read in row lengths */
115235aab85fSBarry Smith   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
115377c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
115435aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
11552593348eSBarry Smith 
1156b6490206SBarry Smith   /* read in column indices */
115735aab85fSBarry Smith   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
115877c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
115935aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1160b6490206SBarry Smith 
1161b6490206SBarry Smith   /* loop over row lengths determining block row lengths */
1162b6490206SBarry Smith   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1163b6490206SBarry Smith   PetscMemzero(browlengths,mbs*sizeof(int));
116435aab85fSBarry Smith   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
116535aab85fSBarry Smith   PetscMemzero(mask,mbs*sizeof(int));
116635aab85fSBarry Smith   masked = mask + mbs;
1167b6490206SBarry Smith   rowcount = 0; nzcount = 0;
1168b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
116935aab85fSBarry Smith     nmask = 0;
1170b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1171b6490206SBarry Smith       kmax = rowlengths[rowcount];
1172b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
117335aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
117435aab85fSBarry Smith         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1175b6490206SBarry Smith       }
1176b6490206SBarry Smith       rowcount++;
1177b6490206SBarry Smith     }
117835aab85fSBarry Smith     browlengths[i] += nmask;
117935aab85fSBarry Smith     /* zero out the mask elements we set */
118035aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1181b6490206SBarry Smith   }
1182b6490206SBarry Smith 
11832593348eSBarry Smith   /* create our matrix */
118435aab85fSBarry Smith   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
118535aab85fSBarry Smith          CHKERRQ(ierr);
11862593348eSBarry Smith   B = *A;
1187b6490206SBarry Smith   a = (Mat_SeqBAIJ *) B->data;
11882593348eSBarry Smith 
11892593348eSBarry Smith   /* set matrix "i" values */
1190de6a44a3SBarry Smith   a->i[0] = 0;
1191b6490206SBarry Smith   for ( i=1; i<= mbs; i++ ) {
1192b6490206SBarry Smith     a->i[i]      = a->i[i-1] + browlengths[i-1];
1193b6490206SBarry Smith     a->ilen[i-1] = browlengths[i-1];
11942593348eSBarry Smith   }
11959867e207SSatish Balay   a->indexshift = 0;
1196b6490206SBarry Smith   a->nz         = 0;
1197b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
11982593348eSBarry Smith 
1199b6490206SBarry Smith   /* read in nonzero values */
120035aab85fSBarry Smith   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
120177c4ece6SBarry Smith   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
120235aab85fSBarry Smith   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1203b6490206SBarry Smith 
1204b6490206SBarry Smith   /* set "a" and "j" values into matrix */
1205b6490206SBarry Smith   nzcount = 0; jcount = 0;
1206b6490206SBarry Smith   for ( i=0; i<mbs; i++ ) {
1207b6490206SBarry Smith     nzcountb = nzcount;
120835aab85fSBarry Smith     nmask    = 0;
1209b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1210b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1211b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
121235aab85fSBarry Smith         tmp = jj[nzcount++]/bs;
121335aab85fSBarry Smith 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1214b6490206SBarry Smith       }
1215b6490206SBarry Smith       rowcount++;
1216b6490206SBarry Smith     }
1217de6a44a3SBarry Smith     /* sort the masked values */
121877c4ece6SBarry Smith     PetscSortInt(nmask,masked);
1219de6a44a3SBarry Smith 
1220b6490206SBarry Smith     /* set "j" values into matrix */
1221b6490206SBarry Smith     maskcount = 1;
122235aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) {
122335aab85fSBarry Smith       a->j[jcount++]  = masked[j];
1224de6a44a3SBarry Smith       mask[masked[j]] = maskcount++;
1225b6490206SBarry Smith     }
1226b6490206SBarry Smith     /* set "a" values into matrix */
1227de6a44a3SBarry Smith     ishift = bs2*a->i[i];
1228b6490206SBarry Smith     for ( j=0; j<bs; j++ ) {
1229b6490206SBarry Smith       kmax = rowlengths[i*bs+j];
1230b6490206SBarry Smith       for ( k=0; k<kmax; k++ ) {
1231de6a44a3SBarry Smith         tmp    = jj[nzcountb]/bs ;
1232de6a44a3SBarry Smith         block  = mask[tmp] - 1;
1233de6a44a3SBarry Smith         point  = jj[nzcountb] - bs*tmp;
1234de6a44a3SBarry Smith         idx    = ishift + bs2*block + j + bs*point;
1235b6490206SBarry Smith         a->a[idx] = aa[nzcountb++];
1236b6490206SBarry Smith       }
1237b6490206SBarry Smith     }
123835aab85fSBarry Smith     /* zero out the mask elements we set */
123935aab85fSBarry Smith     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1240b6490206SBarry Smith   }
124135aab85fSBarry Smith   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
1242b6490206SBarry Smith 
1243b6490206SBarry Smith   PetscFree(rowlengths);
1244b6490206SBarry Smith   PetscFree(browlengths);
1245b6490206SBarry Smith   PetscFree(aa);
1246b6490206SBarry Smith   PetscFree(jj);
1247b6490206SBarry Smith   PetscFree(mask);
1248b6490206SBarry Smith 
1249b6490206SBarry Smith   B->assembled = PETSC_TRUE;
1250de6a44a3SBarry Smith 
1251de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
1252de6a44a3SBarry Smith   if (flg) {
125319bcc07fSBarry Smith     Viewer tviewer;
125419bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
125590ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr);
125619bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
125719bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1258de6a44a3SBarry Smith   }
1259de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
1260de6a44a3SBarry Smith   if (flg) {
126119bcc07fSBarry Smith     Viewer tviewer;
126219bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
126390ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr);
126419bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
126519bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1266de6a44a3SBarry Smith   }
1267de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
1268de6a44a3SBarry Smith   if (flg) {
126919bcc07fSBarry Smith     Viewer tviewer;
127019bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
127119bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
127219bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1273de6a44a3SBarry Smith   }
1274de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
1275de6a44a3SBarry Smith   if (flg) {
127619bcc07fSBarry Smith     Viewer tviewer;
127719bcc07fSBarry Smith     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
127890ace30eSBarry Smith     ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr);
127919bcc07fSBarry Smith     ierr = MatView(B,tviewer); CHKERRQ(ierr);
128019bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1281de6a44a3SBarry Smith   }
1282de6a44a3SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
1283de6a44a3SBarry Smith   if (flg) {
128419bcc07fSBarry Smith     Viewer tviewer;
128519bcc07fSBarry Smith     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
128619bcc07fSBarry Smith     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
128719bcc07fSBarry Smith     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
128819bcc07fSBarry Smith     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
1289de6a44a3SBarry Smith   }
12902593348eSBarry Smith   return 0;
12912593348eSBarry Smith }
12922593348eSBarry Smith 
12932593348eSBarry Smith 
12942593348eSBarry Smith 
1295