1b6490206SBarry Smith 22593348eSBarry Smith #ifndef lint 3*faf6580fSSatish Balay static char vcid[] = "$Id: baij.c,v 1.50 1996/06/24 20:57:39 balay Exp balay $"; 42593348eSBarry Smith #endif 52593348eSBarry Smith 62593348eSBarry Smith /* 7b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 82593348eSBarry Smith matrix storage format. 92593348eSBarry Smith */ 10b6490206SBarry Smith #include "baij.h" 111a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 121a6a6d98SBarry Smith #include "src/inline/spops.h" 132593348eSBarry Smith #include "petsc.h" 143b547af2SSatish Balay 15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 162593348eSBarry Smith 17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 182593348eSBarry Smith { 19b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20bcd2baecSBarry Smith int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 212593348eSBarry Smith 222593348eSBarry Smith /* 232593348eSBarry Smith this is tacky: In the future when we have written special factorization 242593348eSBarry Smith and solve routines for the identity permutation we should use a 252593348eSBarry Smith stride index set instead of the general one. 262593348eSBarry Smith */ 272593348eSBarry Smith if (type == ORDER_NATURAL) { 282593348eSBarry Smith idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 292593348eSBarry Smith for ( i=0; i<n; i++ ) idx[i] = i; 302593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 312593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 322593348eSBarry Smith PetscFree(idx); 332593348eSBarry Smith ISSetPermutation(*rperm); 342593348eSBarry Smith ISSetPermutation(*cperm); 352593348eSBarry Smith ISSetIdentity(*rperm); 362593348eSBarry Smith ISSetIdentity(*cperm); 372593348eSBarry Smith return 0; 382593348eSBarry Smith } 392593348eSBarry Smith 40bcd2baecSBarry Smith MatReorderingRegisterAll(); 41a7c10996SSatish Balay ishift = 0; 42bcd2baecSBarry Smith oshift = -MatReorderingIndexShift[(int)type]; 43bcd2baecSBarry Smith if (MatReorderingRequiresSymmetric[(int)type]) { 441a6a6d98SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 451a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 462593348eSBarry Smith PetscFree(ia); PetscFree(ja); 47bcd2baecSBarry Smith } else { 48bcd2baecSBarry Smith if (ishift == oshift) { 491a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50bcd2baecSBarry Smith } 51bcd2baecSBarry Smith else if (ishift == -1) { 52bcd2baecSBarry Smith /* temporarily subtract 1 from i and j indices */ 531a6a6d98SBarry Smith int nz = a->i[n] - 1; 54bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 551a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 561a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 581a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 59bcd2baecSBarry Smith } else { 60bcd2baecSBarry Smith /* temporarily add 1 to i and j indices */ 611a6a6d98SBarry Smith int nz = a->i[n] - 1; 62bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 631a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 641a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 661a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 67bcd2baecSBarry Smith } 68bcd2baecSBarry Smith } 692593348eSBarry Smith return 0; 702593348eSBarry Smith } 712593348eSBarry Smith 72de6a44a3SBarry Smith /* 73de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 74de6a44a3SBarry Smith */ 75de6a44a3SBarry Smith 76de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 77de6a44a3SBarry Smith { 78de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 797fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 80de6a44a3SBarry Smith 81de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 837fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 84de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85de6a44a3SBarry Smith if (a->j[j] == i) { 86de6a44a3SBarry Smith diag[i] = j; 87de6a44a3SBarry Smith break; 88de6a44a3SBarry Smith } 89de6a44a3SBarry Smith } 90de6a44a3SBarry Smith } 91de6a44a3SBarry Smith a->diag = diag; 92de6a44a3SBarry Smith return 0; 93de6a44a3SBarry Smith } 942593348eSBarry Smith 952593348eSBarry Smith #include "draw.h" 962593348eSBarry Smith #include "pinclude/pviewer.h" 9777c4ece6SBarry Smith #include "sys.h" 982593348eSBarry Smith 99b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 1002593348eSBarry Smith { 101b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1029df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103b6490206SBarry Smith Scalar *aa; 1042593348eSBarry Smith 10590ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1062593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1072593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1082593348eSBarry Smith col_lens[1] = a->m; 1092593348eSBarry Smith col_lens[2] = a->n; 1107e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1112593348eSBarry Smith 1122593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 113b6490206SBarry Smith count = 0; 114b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 115b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 116b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117b6490206SBarry Smith } 1182593348eSBarry Smith } 11977c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1202593348eSBarry Smith PetscFree(col_lens); 1212593348eSBarry Smith 1222593348eSBarry Smith /* store column indices (zero start index) */ 1237e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124b6490206SBarry Smith count = 0; 125b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 126b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 127b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 129b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1302593348eSBarry Smith } 1312593348eSBarry Smith } 132b6490206SBarry Smith } 133b6490206SBarry Smith } 1347e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135b6490206SBarry Smith PetscFree(jj); 1362593348eSBarry Smith 1372593348eSBarry Smith /* store nonzero values */ 1387e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139b6490206SBarry Smith count = 0; 140b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 141b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 142b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1447e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 145b6490206SBarry Smith } 146b6490206SBarry Smith } 147b6490206SBarry Smith } 148b6490206SBarry Smith } 1497e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150b6490206SBarry Smith PetscFree(aa); 1512593348eSBarry Smith return 0; 1522593348eSBarry Smith } 1532593348eSBarry Smith 154b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1552593348eSBarry Smith { 156b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1579df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1582593348eSBarry Smith FILE *fd; 1592593348eSBarry Smith char *outputname; 1602593348eSBarry Smith 16190ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1622593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16390ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 1647ddc982cSLois Curfman McInnes if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 1657ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1662593348eSBarry Smith } 16790ace30eSBarry Smith else if (format == ASCII_FORMAT_MATLAB) { 168b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 1692593348eSBarry Smith } 17044cd7ae7SLois Curfman McInnes else if (format == ASCII_FORMAT_COMMON) { 17144cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17244cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17344cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17444cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17544cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17644cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 17744cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17844cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17944cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 18044cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18144cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18244cd7ae7SLois Curfman McInnes #else 18344cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18444cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18544cd7ae7SLois Curfman McInnes #endif 18644cd7ae7SLois Curfman McInnes } 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 18944cd7ae7SLois Curfman McInnes } 19044cd7ae7SLois Curfman McInnes } 19144cd7ae7SLois Curfman McInnes } 1922593348eSBarry Smith else { 193b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 194b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 195b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 196b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19888685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1997e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 20088685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2017e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20288685aaeSLois Curfman McInnes } 20388685aaeSLois Curfman McInnes else { 2047e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20588685aaeSLois Curfman McInnes } 20688685aaeSLois Curfman McInnes #else 2077e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20888685aaeSLois Curfman McInnes #endif 2092593348eSBarry Smith } 2102593348eSBarry Smith } 2112593348eSBarry Smith fprintf(fd,"\n"); 2122593348eSBarry Smith } 2132593348eSBarry Smith } 214b6490206SBarry Smith } 2152593348eSBarry Smith fflush(fd); 2162593348eSBarry Smith return 0; 2172593348eSBarry Smith } 2182593348eSBarry Smith 219b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 2202593348eSBarry Smith { 2212593348eSBarry Smith Mat A = (Mat) obj; 22219bcc07fSBarry Smith ViewerType vtype; 22319bcc07fSBarry Smith int ierr; 2242593348eSBarry Smith 2252593348eSBarry Smith if (!viewer) { 22619bcc07fSBarry Smith viewer = STDOUT_VIEWER_SELF; 2272593348eSBarry Smith } 22819bcc07fSBarry Smith 22919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 23019bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 231b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 2322593348eSBarry Smith } 23319bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 234b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 2352593348eSBarry Smith } 23619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 237b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 2382593348eSBarry Smith } 23919bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 240b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 2412593348eSBarry Smith } 2422593348eSBarry Smith return 0; 2432593348eSBarry Smith } 244b6490206SBarry Smith 245119a934aSSatish Balay #define CHUNKSIZE 10 246cd0e1443SSatish Balay 247cd0e1443SSatish Balay /* This version has row oriented v */ 248cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 249cd0e1443SSatish Balay { 250cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 251cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 252cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 253a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 2549df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 255cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 256cd0e1443SSatish Balay 257cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 258cd0e1443SSatish Balay row = im[k]; brow = row/bs; 259cd0e1443SSatish Balay if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 260cd0e1443SSatish Balay if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 261a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 262cd0e1443SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; 263cd0e1443SSatish Balay low = 0; 264cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 265cd0e1443SSatish Balay if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 266cd0e1443SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 267a7c10996SSatish Balay col = in[l]; bcol = col/bs; 268cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 269cd0e1443SSatish Balay if (roworiented) { 270cd0e1443SSatish Balay value = *v++; 271cd0e1443SSatish Balay } 272cd0e1443SSatish Balay else { 273cd0e1443SSatish Balay value = v[k + l*m]; 274cd0e1443SSatish Balay } 275cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 276cd0e1443SSatish Balay while (high-low > 5) { 277cd0e1443SSatish Balay t = (low+high)/2; 278cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 279cd0e1443SSatish Balay else low = t; 280cd0e1443SSatish Balay } 281cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 282cd0e1443SSatish Balay if (rp[i] > bcol) break; 283cd0e1443SSatish Balay if (rp[i] == bcol) { 2847e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 285cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 286cd0e1443SSatish Balay else *bap = value; 287cd0e1443SSatish Balay goto noinsert; 288cd0e1443SSatish Balay } 289cd0e1443SSatish Balay } 290cd0e1443SSatish Balay if (nonew) goto noinsert; 291cd0e1443SSatish Balay if (nrow >= rmax) { 292cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 293cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 294cd0e1443SSatish Balay Scalar *new_a; 295cd0e1443SSatish Balay 296cd0e1443SSatish Balay /* malloc new storage space */ 2977e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 298cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 2997e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 300cd0e1443SSatish Balay new_i = new_j + new_nz; 301cd0e1443SSatish Balay 302cd0e1443SSatish Balay /* copy over old data into new slots */ 303cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 3047e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 305a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 306a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 307a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 308cd0e1443SSatish Balay len*sizeof(int)); 309a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 310a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 311a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 312a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 313cd0e1443SSatish Balay /* free up old matrix storage */ 314cd0e1443SSatish Balay PetscFree(a->a); 315cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 316cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 317cd0e1443SSatish Balay a->singlemalloc = 1; 318cd0e1443SSatish Balay 319a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 320cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 3217e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 32218e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 323cd0e1443SSatish Balay a->reallocs++; 324119a934aSSatish Balay a->nz++; 325cd0e1443SSatish Balay } 3267e67e3f9SSatish Balay N = nrow++ - 1; 327cd0e1443SSatish Balay /* shift up all the later entries in this row */ 328cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 329cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 3307e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 331cd0e1443SSatish Balay } 3327e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 333cd0e1443SSatish Balay rp[i] = bcol; 3347e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 335cd0e1443SSatish Balay noinsert:; 336cd0e1443SSatish Balay low = i; 337cd0e1443SSatish Balay } 338cd0e1443SSatish Balay ailen[brow] = nrow; 339cd0e1443SSatish Balay } 340cd0e1443SSatish Balay return 0; 341cd0e1443SSatish Balay } 342cd0e1443SSatish Balay 3430b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 3440b824a48SSatish Balay { 3450b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3460b824a48SSatish Balay *m = a->m; *n = a->n; 3470b824a48SSatish Balay return 0; 3480b824a48SSatish Balay } 3490b824a48SSatish Balay 3500b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 3510b824a48SSatish Balay { 3520b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3530b824a48SSatish Balay *m = 0; *n = a->m; 3540b824a48SSatish Balay return 0; 3550b824a48SSatish Balay } 3560b824a48SSatish Balay 3579867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 3589867e207SSatish Balay { 3599867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3607e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 3619867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 3629867e207SSatish Balay 3639867e207SSatish Balay bs = a->bs; 3649867e207SSatish Balay ai = a->i; 3659867e207SSatish Balay aj = a->j; 3669867e207SSatish Balay aa = a->a; 3679df24120SSatish Balay bs2 = a->bs2; 3689867e207SSatish Balay 3699867e207SSatish Balay if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 3709867e207SSatish Balay 3719867e207SSatish Balay bn = row/bs; /* Block number */ 3729867e207SSatish Balay bp = row % bs; /* Block Position */ 3739867e207SSatish Balay M = ai[bn+1] - ai[bn]; 3749867e207SSatish Balay *nz = bs*M; 3759867e207SSatish Balay 3769867e207SSatish Balay if (v) { 3779867e207SSatish Balay *v = 0; 3789867e207SSatish Balay if (*nz) { 3799867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 3809867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3819867e207SSatish Balay v_i = *v + i*bs; 3827e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 3837e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 3849867e207SSatish Balay } 3859867e207SSatish Balay } 3869867e207SSatish Balay } 3879867e207SSatish Balay 3889867e207SSatish Balay if (idx) { 3899867e207SSatish Balay *idx = 0; 3909867e207SSatish Balay if (*nz) { 3919867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 3929867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3939867e207SSatish Balay idx_i = *idx + i*bs; 3949867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 3959867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 3969867e207SSatish Balay } 3979867e207SSatish Balay } 3989867e207SSatish Balay } 3999867e207SSatish Balay return 0; 4009867e207SSatish Balay } 4019867e207SSatish Balay 4029867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4039867e207SSatish Balay { 4049867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 4059867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 4069867e207SSatish Balay return 0; 4079867e207SSatish Balay } 408b6490206SBarry Smith 409f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 410f2501298SSatish Balay { 411f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 412f2501298SSatish Balay Mat C; 413f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 4149df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 415f2501298SSatish Balay Scalar *array=a->a; 416f2501298SSatish Balay 417f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 418f2501298SSatish Balay SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 419f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 420f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 421a7c10996SSatish Balay 422a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 423f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 424f2501298SSatish Balay PetscFree(col); 425f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 426f2501298SSatish Balay cols = rows + bs; 427f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 428f2501298SSatish Balay cols[0] = i*bs; 429f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 430f2501298SSatish Balay len = ai[i+1] - ai[i]; 431f2501298SSatish Balay for ( j=0; j<len; j++ ) { 432f2501298SSatish Balay rows[0] = (*aj++)*bs; 433f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 434f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 435f2501298SSatish Balay array += bs2; 436f2501298SSatish Balay } 437f2501298SSatish Balay } 4381073c447SSatish Balay PetscFree(rows); 439f2501298SSatish Balay 440f2501298SSatish Balay ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 441f2501298SSatish Balay ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442f2501298SSatish Balay 443f2501298SSatish Balay if (B != PETSC_NULL) { 444f2501298SSatish Balay *B = C; 445f2501298SSatish Balay } else { 446f2501298SSatish Balay /* This isn't really an in-place transpose */ 447f2501298SSatish Balay PetscFree(a->a); 448f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 449f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 450f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 451f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 452f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 453f2501298SSatish Balay PetscFree(a); 454f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 455f2501298SSatish Balay PetscHeaderDestroy(C); 456f2501298SSatish Balay } 457f2501298SSatish Balay return 0; 458f2501298SSatish Balay } 459f2501298SSatish Balay 460f2501298SSatish Balay 461584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 462584200bdSSatish Balay { 463584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 464584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 465a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 4669df24120SSatish Balay int mbs = a->mbs, bs2 = a->bs2; 467584200bdSSatish Balay Scalar *aa = a->a, *ap; 468584200bdSSatish Balay 469584200bdSSatish Balay if (mode == FLUSH_ASSEMBLY) return 0; 470584200bdSSatish Balay 471584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 472584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 473584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 474584200bdSSatish Balay if (fshift) { 475a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 476584200bdSSatish Balay N = ailen[i]; 477584200bdSSatish Balay for ( j=0; j<N; j++ ) { 478584200bdSSatish Balay ip[j-fshift] = ip[j]; 4797e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 480584200bdSSatish Balay } 481584200bdSSatish Balay } 482584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 483584200bdSSatish Balay } 484584200bdSSatish Balay if (mbs) { 485584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 486584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 487584200bdSSatish Balay } 488584200bdSSatish Balay /* reset ilen and imax for each row */ 489584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 490584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 491584200bdSSatish Balay } 492a7c10996SSatish Balay a->nz = ai[mbs]; 493584200bdSSatish Balay 494584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 495584200bdSSatish Balay if (fshift && a->diag) { 496584200bdSSatish Balay PetscFree(a->diag); 497584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 498584200bdSSatish Balay a->diag = 0; 499584200bdSSatish Balay } 50067790700SSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 501584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 502584200bdSSatish Balay a->reallocs); 503584200bdSSatish Balay return 0; 504584200bdSSatish Balay } 505584200bdSSatish Balay 506b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 5072593348eSBarry Smith { 508b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5099df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 5102593348eSBarry Smith return 0; 5112593348eSBarry Smith } 5122593348eSBarry Smith 513b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 5142593348eSBarry Smith { 5152593348eSBarry Smith Mat A = (Mat) obj; 516b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5172593348eSBarry Smith 5182593348eSBarry Smith #if defined(PETSC_LOG) 5192593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 5202593348eSBarry Smith #endif 5212593348eSBarry Smith PetscFree(a->a); 5222593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 5232593348eSBarry Smith if (a->diag) PetscFree(a->diag); 5242593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 5252593348eSBarry Smith if (a->imax) PetscFree(a->imax); 5262593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 527de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 5282593348eSBarry Smith PetscFree(a); 529f2655603SLois Curfman McInnes PLogObjectDestroy(A); 530f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 5312593348eSBarry Smith return 0; 5322593348eSBarry Smith } 5332593348eSBarry Smith 534b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 5352593348eSBarry Smith { 536b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5372593348eSBarry Smith if (op == ROW_ORIENTED) a->roworiented = 1; 5382593348eSBarry Smith else if (op == COLUMN_ORIENTED) a->roworiented = 0; 5392593348eSBarry Smith else if (op == COLUMNS_SORTED) a->sorted = 1; 5402593348eSBarry Smith else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 5412593348eSBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 5422593348eSBarry Smith else if (op == ROWS_SORTED || 5432593348eSBarry Smith op == SYMMETRIC_MATRIX || 5442593348eSBarry Smith op == STRUCTURALLY_SYMMETRIC_MATRIX || 5452593348eSBarry Smith op == YES_NEW_DIAGONALS) 54694a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 5472593348eSBarry Smith else if (op == NO_NEW_DIAGONALS) 548b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 5492593348eSBarry Smith else 550b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 5512593348eSBarry Smith return 0; 5522593348eSBarry Smith } 5532593348eSBarry Smith 5542593348eSBarry Smith 5552593348eSBarry Smith /* -------------------------------------------------------*/ 5562593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 5572593348eSBarry Smith /* -------------------------------------------------------*/ 558b6490206SBarry Smith #include "pinclude/plapack.h" 559b6490206SBarry Smith 56039b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 5612593348eSBarry Smith { 562b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5631a6a6d98SBarry Smith Scalar *xg,*zg; 56439b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 56539b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii,n,ierr; 5662593348eSBarry Smith 567bb42667fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 568bb42667fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 569b6490206SBarry Smith 570119a934aSSatish Balay idx = a->j; 571119a934aSSatish Balay v = a->a; 572119a934aSSatish Balay ii = a->i; 573119a934aSSatish Balay 574119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 575119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 576119a934aSSatish Balay sum = 0.0; 577119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 5781a6a6d98SBarry Smith z[i] = sum; 579119a934aSSatish Balay } 58039b95ed1SBarry Smith ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 58139b95ed1SBarry Smith ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 58239b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 58339b95ed1SBarry Smith return 0; 58439b95ed1SBarry Smith } 58539b95ed1SBarry Smith 58639b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 58739b95ed1SBarry Smith { 58839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 58939b95ed1SBarry Smith Scalar *xg,*zg; 59039b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 59139b95ed1SBarry Smith register Scalar x1,x2; 59239b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 59339b95ed1SBarry Smith int j,n,ierr; 59439b95ed1SBarry Smith 59539b95ed1SBarry Smith ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 59639b95ed1SBarry Smith ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 59739b95ed1SBarry Smith 59839b95ed1SBarry Smith idx = a->j; 59939b95ed1SBarry Smith v = a->a; 60039b95ed1SBarry Smith ii = a->i; 60139b95ed1SBarry Smith 602119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 603119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 604119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 605119a934aSSatish Balay for ( j=0; j<n; j++ ) { 606119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 607119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 608119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 609119a934aSSatish Balay v += 4; 610119a934aSSatish Balay } 6111a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 612119a934aSSatish Balay z += 2; 613119a934aSSatish Balay } 61439b95ed1SBarry Smith ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 61539b95ed1SBarry Smith ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 61639b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 61739b95ed1SBarry Smith return 0; 61839b95ed1SBarry Smith } 61939b95ed1SBarry Smith 62039b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 62139b95ed1SBarry Smith { 62239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 62339b95ed1SBarry Smith Scalar *xg,*zg; 62439b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 62539b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 62639b95ed1SBarry Smith 62739b95ed1SBarry Smith ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 62839b95ed1SBarry Smith ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 62939b95ed1SBarry Smith 63039b95ed1SBarry Smith idx = a->j; 63139b95ed1SBarry Smith v = a->a; 63239b95ed1SBarry Smith ii = a->i; 63339b95ed1SBarry Smith 634119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 635119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 636119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 637119a934aSSatish Balay for ( j=0; j<n; j++ ) { 638119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 639119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 640119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 641119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 642119a934aSSatish Balay v += 9; 643119a934aSSatish Balay } 6441a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 645119a934aSSatish Balay z += 3; 646119a934aSSatish Balay } 64739b95ed1SBarry Smith ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 64839b95ed1SBarry Smith ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 64939b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 65039b95ed1SBarry Smith return 0; 65139b95ed1SBarry Smith } 65239b95ed1SBarry Smith 65339b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 65439b95ed1SBarry Smith { 65539b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 65639b95ed1SBarry Smith Scalar *xg,*zg; 65739b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 65839b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 65939b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 66039b95ed1SBarry Smith int j,n,ierr; 66139b95ed1SBarry Smith 66239b95ed1SBarry Smith ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 66339b95ed1SBarry Smith ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 66439b95ed1SBarry Smith 66539b95ed1SBarry Smith idx = a->j; 66639b95ed1SBarry Smith v = a->a; 66739b95ed1SBarry Smith ii = a->i; 66839b95ed1SBarry Smith 669119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 670119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 671119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 672119a934aSSatish Balay for ( j=0; j<n; j++ ) { 673119a934aSSatish Balay xb = x + 4*(*idx++); 674119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 675119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 676119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 677119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 678119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 679119a934aSSatish Balay v += 16; 680119a934aSSatish Balay } 6811a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 682119a934aSSatish Balay z += 4; 683119a934aSSatish Balay } 68439b95ed1SBarry Smith ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 68539b95ed1SBarry Smith ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 68639b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 68739b95ed1SBarry Smith return 0; 68839b95ed1SBarry Smith } 68939b95ed1SBarry Smith 69039b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 69139b95ed1SBarry Smith { 69239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 69339b95ed1SBarry Smith Scalar *xg,*zg; 69439b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 69539b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 69639b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 69739b95ed1SBarry Smith 69839b95ed1SBarry Smith ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 69939b95ed1SBarry Smith ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 70039b95ed1SBarry Smith 70139b95ed1SBarry Smith idx = a->j; 70239b95ed1SBarry Smith v = a->a; 70339b95ed1SBarry Smith ii = a->i; 70439b95ed1SBarry Smith 705119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 706119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 707119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 708119a934aSSatish Balay for ( j=0; j<n; j++ ) { 709119a934aSSatish Balay xb = x + 5*(*idx++); 710119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 711119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 712119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 713119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 714119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 715119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 716119a934aSSatish Balay v += 25; 717119a934aSSatish Balay } 7181a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 719119a934aSSatish Balay z += 5; 720119a934aSSatish Balay } 72139b95ed1SBarry Smith ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 72239b95ed1SBarry Smith ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 72339b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 72439b95ed1SBarry Smith return 0; 72539b95ed1SBarry Smith } 72639b95ed1SBarry Smith 72739b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 72839b95ed1SBarry Smith { 72939b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 73039b95ed1SBarry Smith Scalar *xg,*zg; 73139b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 73239b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 7331a6a6d98SBarry Smith int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 73439b95ed1SBarry Smith 73539b95ed1SBarry Smith ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 73639b95ed1SBarry Smith ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 73739b95ed1SBarry Smith 73839b95ed1SBarry Smith idx = a->j; 73939b95ed1SBarry Smith v = a->a; 74039b95ed1SBarry Smith ii = a->i; 74139b95ed1SBarry Smith 74239b95ed1SBarry Smith 743119a934aSSatish Balay if (!a->mult_work) { 7443b547af2SSatish Balay k = PetscMax(a->m,a->n); 74539b95ed1SBarry Smith a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 746119a934aSSatish Balay } 747119a934aSSatish Balay work = a->mult_work; 748119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 749119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 750119a934aSSatish Balay ncols = n*bs; 751119a934aSSatish Balay workt = work; 752119a934aSSatish Balay for ( j=0; j<n; j++ ) { 753119a934aSSatish Balay xb = x + bs*(*idx++); 754119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 755119a934aSSatish Balay workt += bs; 756119a934aSSatish Balay } 7571a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 758119a934aSSatish Balay v += n*bs2; 759119a934aSSatish Balay z += bs; 760119a934aSSatish Balay } 7610419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 7620419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 7631a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 764bb42667fSSatish Balay return 0; 765bb42667fSSatish Balay } 766bb42667fSSatish Balay 767f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 768f44d3a6dSSatish Balay { 769f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 770f44d3a6dSSatish Balay Scalar *xg,*yg,*zg; 771f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 772f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,n,ierr; 773f44d3a6dSSatish Balay 774f44d3a6dSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 775f44d3a6dSSatish Balay ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 776f44d3a6dSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 777f44d3a6dSSatish Balay 778f44d3a6dSSatish Balay idx = a->j; 779f44d3a6dSSatish Balay v = a->a; 780f44d3a6dSSatish Balay ii = a->i; 781f44d3a6dSSatish Balay 782f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 783f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 784f44d3a6dSSatish Balay sum = y[i]; 785f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 786f44d3a6dSSatish Balay z[i] = sum; 787f44d3a6dSSatish Balay } 788f44d3a6dSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 789f44d3a6dSSatish Balay ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 790f44d3a6dSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 791f44d3a6dSSatish Balay PLogFlops(2*a->nz); 792f44d3a6dSSatish Balay return 0; 793f44d3a6dSSatish Balay } 794f44d3a6dSSatish Balay 795f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 796f44d3a6dSSatish Balay { 797f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 798f44d3a6dSSatish Balay Scalar *xg,*yg,*zg; 799f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 800f44d3a6dSSatish Balay register Scalar x1,x2; 801f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 802f44d3a6dSSatish Balay int j,n,ierr; 803f44d3a6dSSatish Balay 804f44d3a6dSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 805f44d3a6dSSatish Balay ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 806f44d3a6dSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 807f44d3a6dSSatish Balay 808f44d3a6dSSatish Balay idx = a->j; 809f44d3a6dSSatish Balay v = a->a; 810f44d3a6dSSatish Balay ii = a->i; 811f44d3a6dSSatish Balay 812f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 813f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 814f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 815f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 816f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 817f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 818f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 819f44d3a6dSSatish Balay v += 4; 820f44d3a6dSSatish Balay } 821f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 822f44d3a6dSSatish Balay z += 2; y += 2; 823f44d3a6dSSatish Balay } 824f44d3a6dSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 825f44d3a6dSSatish Balay ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 826f44d3a6dSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 827f44d3a6dSSatish Balay PLogFlops(4*a->nz); 828f44d3a6dSSatish Balay return 0; 829f44d3a6dSSatish Balay } 830f44d3a6dSSatish Balay 831f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 832f44d3a6dSSatish Balay { 833f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 834f44d3a6dSSatish Balay Scalar *xg,*yg,*zg; 835f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 836f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 837f44d3a6dSSatish Balay 838f44d3a6dSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 839f44d3a6dSSatish Balay ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 840f44d3a6dSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 841f44d3a6dSSatish Balay 842f44d3a6dSSatish Balay idx = a->j; 843f44d3a6dSSatish Balay v = a->a; 844f44d3a6dSSatish Balay ii = a->i; 845f44d3a6dSSatish Balay 846f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 847f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 848f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 849f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 850f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 851f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 852f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 853f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 854f44d3a6dSSatish Balay v += 9; 855f44d3a6dSSatish Balay } 856f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 857f44d3a6dSSatish Balay z += 3; y += 3; 858f44d3a6dSSatish Balay } 859f44d3a6dSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 860f44d3a6dSSatish Balay ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 861f44d3a6dSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 862f44d3a6dSSatish Balay PLogFlops(18*a->nz); 863f44d3a6dSSatish Balay return 0; 864f44d3a6dSSatish Balay } 865f44d3a6dSSatish Balay 866f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 867f44d3a6dSSatish Balay { 868f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 869f44d3a6dSSatish Balay Scalar *xg,*yg,*zg; 870f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 871f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 872f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 873f44d3a6dSSatish Balay int j,n,ierr; 874f44d3a6dSSatish Balay 875f44d3a6dSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 876f44d3a6dSSatish Balay ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 877f44d3a6dSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 878f44d3a6dSSatish Balay 879f44d3a6dSSatish Balay idx = a->j; 880f44d3a6dSSatish Balay v = a->a; 881f44d3a6dSSatish Balay ii = a->i; 882f44d3a6dSSatish Balay 883f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 884f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 885f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 886f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 887f44d3a6dSSatish Balay xb = x + 4*(*idx++); 888f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 889f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 890f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 891f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 892f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 893f44d3a6dSSatish Balay v += 16; 894f44d3a6dSSatish Balay } 895f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 896f44d3a6dSSatish Balay z += 4; y += 4; 897f44d3a6dSSatish Balay } 898f44d3a6dSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 899f44d3a6dSSatish Balay ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 900f44d3a6dSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 901f44d3a6dSSatish Balay PLogFlops(32*a->nz); 902f44d3a6dSSatish Balay return 0; 903f44d3a6dSSatish Balay } 904f44d3a6dSSatish Balay 905f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 906f44d3a6dSSatish Balay { 907f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 908f44d3a6dSSatish Balay Scalar *xg,*yg,*zg; 909f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 910f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 911f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 912f44d3a6dSSatish Balay 913f44d3a6dSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 914f44d3a6dSSatish Balay ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 915f44d3a6dSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 916f44d3a6dSSatish Balay 917f44d3a6dSSatish Balay idx = a->j; 918f44d3a6dSSatish Balay v = a->a; 919f44d3a6dSSatish Balay ii = a->i; 920f44d3a6dSSatish Balay 921f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 922f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 923f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 924f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 925f44d3a6dSSatish Balay xb = x + 5*(*idx++); 926f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 927f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 928f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 929f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 930f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 931f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 932f44d3a6dSSatish Balay v += 25; 933f44d3a6dSSatish Balay } 934f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 935f44d3a6dSSatish Balay z += 5; y += 5; 936f44d3a6dSSatish Balay } 937f44d3a6dSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 938f44d3a6dSSatish Balay ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 939f44d3a6dSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 940f44d3a6dSSatish Balay PLogFlops(50*a->nz); 941f44d3a6dSSatish Balay return 0; 942f44d3a6dSSatish Balay } 943f44d3a6dSSatish Balay 944f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 945f44d3a6dSSatish Balay { 946f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 947f44d3a6dSSatish Balay Scalar *xg,*zg; 948f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 949f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 950f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 951f44d3a6dSSatish Balay 952f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 953f44d3a6dSSatish Balay 954f44d3a6dSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 955f44d3a6dSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 956f44d3a6dSSatish Balay 957f44d3a6dSSatish Balay idx = a->j; 958f44d3a6dSSatish Balay v = a->a; 959f44d3a6dSSatish Balay ii = a->i; 960f44d3a6dSSatish Balay 961f44d3a6dSSatish Balay 962f44d3a6dSSatish Balay if (!a->mult_work) { 963f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 964f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 965f44d3a6dSSatish Balay } 966f44d3a6dSSatish Balay work = a->mult_work; 967f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 968f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 969f44d3a6dSSatish Balay ncols = n*bs; 970f44d3a6dSSatish Balay workt = work; 971f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 972f44d3a6dSSatish Balay xb = x + bs*(*idx++); 973f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 974f44d3a6dSSatish Balay workt += bs; 975f44d3a6dSSatish Balay } 976f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 977f44d3a6dSSatish Balay v += n*bs2; 978f44d3a6dSSatish Balay z += bs; 979f44d3a6dSSatish Balay } 980f44d3a6dSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 981f44d3a6dSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 982f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 983f44d3a6dSSatish Balay return 0; 984f44d3a6dSSatish Balay } 985f44d3a6dSSatish Balay 9861a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 987bb42667fSSatish Balay { 988bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 9891a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 990bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 991bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 9929df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 993119a934aSSatish Balay 994119a934aSSatish Balay 995bb42667fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 996bb42667fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 997bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 998bb42667fSSatish Balay 999119a934aSSatish Balay idx = a->j; 1000119a934aSSatish Balay v = a->a; 1001119a934aSSatish Balay ii = a->i; 1002119a934aSSatish Balay 1003119a934aSSatish Balay switch (bs) { 1004119a934aSSatish Balay case 1: 1005119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1006119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1007119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1008119a934aSSatish Balay ib = idx + ai[i]; 1009119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1010bb1453f0SSatish Balay rval = ib[j]; 1011bb1453f0SSatish Balay z[rval] += *v++ * x1; 1012119a934aSSatish Balay } 1013119a934aSSatish Balay } 1014119a934aSSatish Balay break; 1015119a934aSSatish Balay case 2: 1016119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1017119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1018119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1019119a934aSSatish Balay ib = idx + ai[i]; 1020119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1021119a934aSSatish Balay rval = ib[j]*2; 1022bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1023bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1024119a934aSSatish Balay v += 4; 1025119a934aSSatish Balay } 1026119a934aSSatish Balay } 1027119a934aSSatish Balay break; 1028119a934aSSatish Balay case 3: 1029119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1030119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1031119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1032119a934aSSatish Balay ib = idx + ai[i]; 1033119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1034119a934aSSatish Balay rval = ib[j]*3; 1035bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1036bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1037bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1038119a934aSSatish Balay v += 9; 1039119a934aSSatish Balay } 1040119a934aSSatish Balay } 1041119a934aSSatish Balay break; 1042119a934aSSatish Balay case 4: 1043119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1044119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1045119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1046119a934aSSatish Balay ib = idx + ai[i]; 1047119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1048119a934aSSatish Balay rval = ib[j]*4; 1049bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1050bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1051bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1052bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1053119a934aSSatish Balay v += 16; 1054119a934aSSatish Balay } 1055119a934aSSatish Balay } 1056119a934aSSatish Balay break; 1057119a934aSSatish Balay case 5: 1058119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1059119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1060119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1061119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1062119a934aSSatish Balay ib = idx + ai[i]; 1063119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1064119a934aSSatish Balay rval = ib[j]*5; 1065bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1066bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1067bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1068bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1069bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1070119a934aSSatish Balay v += 25; 1071119a934aSSatish Balay } 1072119a934aSSatish Balay } 1073119a934aSSatish Balay break; 1074119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1075119a934aSSatish Balay default: { 1076119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1077119a934aSSatish Balay if (!a->mult_work) { 10783b547af2SSatish Balay k = PetscMax(a->m,a->n); 1079bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1080119a934aSSatish Balay CHKPTRQ(a->mult_work); 1081119a934aSSatish Balay } 1082119a934aSSatish Balay work = a->mult_work; 1083119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1084119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1085119a934aSSatish Balay ncols = n*bs; 1086119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1087119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1088119a934aSSatish Balay v += n*bs2; 1089119a934aSSatish Balay x += bs; 1090119a934aSSatish Balay workt = work; 1091119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1092119a934aSSatish Balay zb = z + bs*(*idx++); 1093bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1094119a934aSSatish Balay workt += bs; 1095119a934aSSatish Balay } 1096119a934aSSatish Balay } 1097119a934aSSatish Balay } 1098119a934aSSatish Balay } 10990419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 11000419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1101*faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1102*faf6580fSSatish Balay return 0; 1103*faf6580fSSatish Balay } 1104*faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105*faf6580fSSatish Balay { 1106*faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1107*faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1108*faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1109*faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1110*faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1111*faf6580fSSatish Balay 1112*faf6580fSSatish Balay 1113*faf6580fSSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1114*faf6580fSSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1115*faf6580fSSatish Balay 1116*faf6580fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1117*faf6580fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1118*faf6580fSSatish Balay 1119*faf6580fSSatish Balay 1120*faf6580fSSatish Balay idx = a->j; 1121*faf6580fSSatish Balay v = a->a; 1122*faf6580fSSatish Balay ii = a->i; 1123*faf6580fSSatish Balay 1124*faf6580fSSatish Balay switch (bs) { 1125*faf6580fSSatish Balay case 1: 1126*faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1127*faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1128*faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1129*faf6580fSSatish Balay ib = idx + ai[i]; 1130*faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1131*faf6580fSSatish Balay rval = ib[j]; 1132*faf6580fSSatish Balay z[rval] += *v++ * x1; 1133*faf6580fSSatish Balay } 1134*faf6580fSSatish Balay } 1135*faf6580fSSatish Balay break; 1136*faf6580fSSatish Balay case 2: 1137*faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1138*faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1139*faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1140*faf6580fSSatish Balay ib = idx + ai[i]; 1141*faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1142*faf6580fSSatish Balay rval = ib[j]*2; 1143*faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1144*faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1145*faf6580fSSatish Balay v += 4; 1146*faf6580fSSatish Balay } 1147*faf6580fSSatish Balay } 1148*faf6580fSSatish Balay break; 1149*faf6580fSSatish Balay case 3: 1150*faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1151*faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1152*faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1153*faf6580fSSatish Balay ib = idx + ai[i]; 1154*faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1155*faf6580fSSatish Balay rval = ib[j]*3; 1156*faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1157*faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1158*faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1159*faf6580fSSatish Balay v += 9; 1160*faf6580fSSatish Balay } 1161*faf6580fSSatish Balay } 1162*faf6580fSSatish Balay break; 1163*faf6580fSSatish Balay case 4: 1164*faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1165*faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1166*faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1167*faf6580fSSatish Balay ib = idx + ai[i]; 1168*faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1169*faf6580fSSatish Balay rval = ib[j]*4; 1170*faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1171*faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1172*faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1173*faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1174*faf6580fSSatish Balay v += 16; 1175*faf6580fSSatish Balay } 1176*faf6580fSSatish Balay } 1177*faf6580fSSatish Balay break; 1178*faf6580fSSatish Balay case 5: 1179*faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1180*faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1181*faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1182*faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1183*faf6580fSSatish Balay ib = idx + ai[i]; 1184*faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1185*faf6580fSSatish Balay rval = ib[j]*5; 1186*faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1187*faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1188*faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1189*faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1190*faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1191*faf6580fSSatish Balay v += 25; 1192*faf6580fSSatish Balay } 1193*faf6580fSSatish Balay } 1194*faf6580fSSatish Balay break; 1195*faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1196*faf6580fSSatish Balay default: { 1197*faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1198*faf6580fSSatish Balay if (!a->mult_work) { 1199*faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1200*faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1201*faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1202*faf6580fSSatish Balay } 1203*faf6580fSSatish Balay work = a->mult_work; 1204*faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1205*faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1206*faf6580fSSatish Balay ncols = n*bs; 1207*faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1208*faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1209*faf6580fSSatish Balay v += n*bs2; 1210*faf6580fSSatish Balay x += bs; 1211*faf6580fSSatish Balay workt = work; 1212*faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1213*faf6580fSSatish Balay zb = z + bs*(*idx++); 1214*faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1215*faf6580fSSatish Balay workt += bs; 1216*faf6580fSSatish Balay } 1217*faf6580fSSatish Balay } 1218*faf6580fSSatish Balay } 1219*faf6580fSSatish Balay } 1220*faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1221*faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1222*faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 12232593348eSBarry Smith return 0; 12242593348eSBarry Smith } 12252593348eSBarry Smith 1226de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 12272593348eSBarry Smith { 1228b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 12299df24120SSatish Balay if (nz) *nz = a->bs2*a->nz; 1230bcd2baecSBarry Smith if (nza) *nza = a->maxnz; 1231bcd2baecSBarry Smith if (mem) *mem = (int)A->mem; 12322593348eSBarry Smith return 0; 12332593348eSBarry Smith } 12342593348eSBarry Smith 123591d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 123691d316f6SSatish Balay { 123791d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 123891d316f6SSatish Balay 123991d316f6SSatish Balay if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 124091d316f6SSatish Balay 124191d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 124291d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1243a7c10996SSatish Balay (a->nz != b->nz)) { 124491d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 124591d316f6SSatish Balay } 124691d316f6SSatish Balay 124791d316f6SSatish Balay /* if the a->i are the same */ 124891d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 124991d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 125091d316f6SSatish Balay } 125191d316f6SSatish Balay 125291d316f6SSatish Balay /* if a->j are the same */ 125391d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 125491d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 125591d316f6SSatish Balay } 125691d316f6SSatish Balay 125791d316f6SSatish Balay /* if a->a are the same */ 125891d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 125991d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 126091d316f6SSatish Balay } 126191d316f6SSatish Balay *flg = PETSC_TRUE; 126291d316f6SSatish Balay return 0; 126391d316f6SSatish Balay 126491d316f6SSatish Balay } 126591d316f6SSatish Balay 126691d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 126791d316f6SSatish Balay { 126891d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 12697e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 127017e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 127117e48fc4SSatish Balay 127217e48fc4SSatish Balay bs = a->bs; 127317e48fc4SSatish Balay aa = a->a; 127417e48fc4SSatish Balay ai = a->i; 127517e48fc4SSatish Balay aj = a->j; 127617e48fc4SSatish Balay ambs = a->mbs; 12779df24120SSatish Balay bs2 = a->bs2; 127891d316f6SSatish Balay 127991d316f6SSatish Balay VecSet(&zero,v); 128091d316f6SSatish Balay VecGetArray(v,&x); VecGetLocalSize(v,&n); 12819867e207SSatish Balay if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 128217e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 128317e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 128417e48fc4SSatish Balay if (aj[j] == i) { 128517e48fc4SSatish Balay row = i*bs; 12867e67e3f9SSatish Balay aa_j = aa+j*bs2; 12877e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 128891d316f6SSatish Balay break; 128991d316f6SSatish Balay } 129091d316f6SSatish Balay } 129191d316f6SSatish Balay } 129291d316f6SSatish Balay return 0; 129391d316f6SSatish Balay } 129491d316f6SSatish Balay 12959867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 12969867e207SSatish Balay { 12979867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 12989867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 12997e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 13009867e207SSatish Balay 13019867e207SSatish Balay ai = a->i; 13029867e207SSatish Balay aj = a->j; 13039867e207SSatish Balay aa = a->a; 13049867e207SSatish Balay m = a->m; 13059867e207SSatish Balay n = a->n; 13069867e207SSatish Balay bs = a->bs; 13079867e207SSatish Balay mbs = a->mbs; 13089df24120SSatish Balay bs2 = a->bs2; 13099867e207SSatish Balay if (ll) { 13109867e207SSatish Balay VecGetArray(ll,&l); VecGetSize(ll,&lm); 13119867e207SSatish Balay if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 13129867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 13139867e207SSatish Balay M = ai[i+1] - ai[i]; 13149867e207SSatish Balay li = l + i*bs; 13157e67e3f9SSatish Balay v = aa + bs2*ai[i]; 13169867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 13177e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 13189867e207SSatish Balay (*v++) *= li[k%bs]; 13199867e207SSatish Balay } 13209867e207SSatish Balay } 13219867e207SSatish Balay } 13229867e207SSatish Balay } 13239867e207SSatish Balay 13249867e207SSatish Balay if (rr) { 13259867e207SSatish Balay VecGetArray(rr,&r); VecGetSize(rr,&rn); 13269867e207SSatish Balay if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 13279867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 13289867e207SSatish Balay M = ai[i+1] - ai[i]; 13297e67e3f9SSatish Balay v = aa + bs2*ai[i]; 13309867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 13319867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 13329867e207SSatish Balay for ( k=0; k<bs; k++ ) { 13339867e207SSatish Balay x = ri[k]; 13349867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 13359867e207SSatish Balay } 13369867e207SSatish Balay } 13379867e207SSatish Balay } 13389867e207SSatish Balay } 13399867e207SSatish Balay return 0; 13409867e207SSatish Balay } 13419867e207SSatish Balay 13429867e207SSatish Balay 1343b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1344b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 13452a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1346736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1347736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 13481a6a6d98SBarry Smith 13491a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 13501a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 13511a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 13521a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 13531a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 13541a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 13551a6a6d98SBarry Smith 13567fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 13577fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 13587fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 13597fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 13607fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 13617fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 13622593348eSBarry Smith 1363b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 13642593348eSBarry Smith { 1365b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 13662593348eSBarry Smith Scalar *v = a->a; 13672593348eSBarry Smith double sum = 0.0; 13689df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 13692593348eSBarry Smith 13702593348eSBarry Smith if (type == NORM_FROBENIUS) { 13710419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 13722593348eSBarry Smith #if defined(PETSC_COMPLEX) 13732593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 13742593348eSBarry Smith #else 13752593348eSBarry Smith sum += (*v)*(*v); v++; 13762593348eSBarry Smith #endif 13772593348eSBarry Smith } 13782593348eSBarry Smith *norm = sqrt(sum); 13792593348eSBarry Smith } 13802593348eSBarry Smith else { 1381b6490206SBarry Smith SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 13822593348eSBarry Smith } 13832593348eSBarry Smith return 0; 13842593348eSBarry Smith } 13852593348eSBarry Smith 13862593348eSBarry Smith /* 13872593348eSBarry Smith note: This can only work for identity for row and col. It would 13882593348eSBarry Smith be good to check this and otherwise generate an error. 13892593348eSBarry Smith */ 1390b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 13912593348eSBarry Smith { 1392b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 13932593348eSBarry Smith Mat outA; 1394de6a44a3SBarry Smith int ierr; 13952593348eSBarry Smith 1396b6490206SBarry Smith if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 13972593348eSBarry Smith 13982593348eSBarry Smith outA = inA; 13992593348eSBarry Smith inA->factor = FACTOR_LU; 14002593348eSBarry Smith a->row = row; 14012593348eSBarry Smith a->col = col; 14022593348eSBarry Smith 1403de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 14042593348eSBarry Smith 14052593348eSBarry Smith if (!a->diag) { 1406b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 14072593348eSBarry Smith } 14087fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 14092593348eSBarry Smith return 0; 14102593348eSBarry Smith } 14112593348eSBarry Smith 1412b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 14132593348eSBarry Smith { 1414b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 14159df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1416b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1417b6490206SBarry Smith PLogFlops(totalnz); 14182593348eSBarry Smith return 0; 14192593348eSBarry Smith } 14202593348eSBarry Smith 14217e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 14227e67e3f9SSatish Balay { 14237e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14247e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1425a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 14269df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 14277e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 14287e67e3f9SSatish Balay 14297e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 14307e67e3f9SSatish Balay row = im[k]; brow = row/bs; 14317e67e3f9SSatish Balay if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 14327e67e3f9SSatish Balay if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1433a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 14347e67e3f9SSatish Balay nrow = ailen[brow]; 14357e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 14367e67e3f9SSatish Balay if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 14377e67e3f9SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1438a7c10996SSatish Balay col = in[l] ; 14397e67e3f9SSatish Balay bcol = col/bs; 14407e67e3f9SSatish Balay cidx = col%bs; 14417e67e3f9SSatish Balay ridx = row%bs; 14427e67e3f9SSatish Balay high = nrow; 14437e67e3f9SSatish Balay low = 0; /* assume unsorted */ 14447e67e3f9SSatish Balay while (high-low > 5) { 14457e67e3f9SSatish Balay t = (low+high)/2; 14467e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 14477e67e3f9SSatish Balay else low = t; 14487e67e3f9SSatish Balay } 14497e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 14507e67e3f9SSatish Balay if (rp[i] > bcol) break; 14517e67e3f9SSatish Balay if (rp[i] == bcol) { 14527e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 14537e67e3f9SSatish Balay goto finished; 14547e67e3f9SSatish Balay } 14557e67e3f9SSatish Balay } 14567e67e3f9SSatish Balay *v++ = zero; 14577e67e3f9SSatish Balay finished:; 14587e67e3f9SSatish Balay } 14597e67e3f9SSatish Balay } 14607e67e3f9SSatish Balay return 0; 14617e67e3f9SSatish Balay } 14627e67e3f9SSatish Balay 14632593348eSBarry Smith /* -------------------------------------------------------------------*/ 1464cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 14659867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1466f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1467*faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 14681a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 1469b6490206SBarry Smith 0,0, 1470de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 1471b6490206SBarry Smith 0, 1472f2501298SSatish Balay MatTranspose_SeqBAIJ, 147317e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 14749867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1475584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 1476b6490206SBarry Smith 0, 1477b6490206SBarry Smith MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1478b6490206SBarry Smith MatGetReordering_SeqBAIJ, 14797fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1480b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1481de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 1482b6490206SBarry Smith 0,0,/* MatConvert_SeqBAIJ */ 0, 1483736121d4SSatish Balay MatGetSubMatrix_SeqBAIJ,0, 1484b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 1485b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 1486af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 14877e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 14881a6a6d98SBarry Smith 0,MatScale_SeqBAIJ, 1489b6490206SBarry Smith 0}; 14902593348eSBarry Smith 14912593348eSBarry Smith /*@C 149244cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 149344cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 149444cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 14952593348eSBarry Smith (or nzz). By setting these parameters accurately, performance can be 14962593348eSBarry Smith increased by more than a factor of 50. 14972593348eSBarry Smith 14982593348eSBarry Smith Input Parameters: 14992593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 1500b6490206SBarry Smith . bs - size of block 15012593348eSBarry Smith . m - number of rows 15022593348eSBarry Smith . n - number of columns 1503b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1504b6490206SBarry Smith . nzz - number of block nonzeros per block row or PETSC_NULL 1505b6490206SBarry Smith (possibly different for each row) 15062593348eSBarry Smith 15072593348eSBarry Smith Output Parameter: 15082593348eSBarry Smith . A - the matrix 15092593348eSBarry Smith 15102593348eSBarry Smith Notes: 151144cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 15122593348eSBarry Smith storage. That is, the stored row and column indices can begin at 151344cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 15142593348eSBarry Smith 15152593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 15162593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 15172593348eSBarry Smith allocation. For additional details, see the users manual chapter on 15182593348eSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 15192593348eSBarry Smith 152044cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 15212593348eSBarry Smith @*/ 1522b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 15232593348eSBarry Smith { 15242593348eSBarry Smith Mat B; 1525b6490206SBarry Smith Mat_SeqBAIJ *b; 1526f2501298SSatish Balay int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1527b6490206SBarry Smith 1528f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 1529f2501298SSatish Balay SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 15302593348eSBarry Smith 15312593348eSBarry Smith *A = 0; 1532b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 15332593348eSBarry Smith PLogObjectCreate(B); 1534b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 153544cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 15362593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 15371a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 15381a6a6d98SBarry Smith if (!flg) { 15397fc0212eSBarry Smith switch (bs) { 15407fc0212eSBarry Smith case 1: 15417fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 15421a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 154339b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 1544f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 15457fc0212eSBarry Smith break; 15464eeb42bcSBarry Smith case 2: 15474eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 15481a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 154939b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 1550f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 15516d84be18SBarry Smith break; 1552f327dd97SBarry Smith case 3: 1553f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 15541a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 155539b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 1556f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 15574eeb42bcSBarry Smith break; 15586d84be18SBarry Smith case 4: 15596d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 15601a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 156139b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 1562f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 15636d84be18SBarry Smith break; 15646d84be18SBarry Smith case 5: 15656d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 15661a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 156739b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 1568f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 15696d84be18SBarry Smith break; 15707fc0212eSBarry Smith } 15711a6a6d98SBarry Smith } 1572b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1573b6490206SBarry Smith B->view = MatView_SeqBAIJ; 15742593348eSBarry Smith B->factor = 0; 15752593348eSBarry Smith B->lupivotthreshold = 1.0; 15762593348eSBarry Smith b->row = 0; 15772593348eSBarry Smith b->col = 0; 15782593348eSBarry Smith b->reallocs = 0; 15792593348eSBarry Smith 158044cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 158144cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1582b6490206SBarry Smith b->mbs = mbs; 1583f2501298SSatish Balay b->nbs = nbs; 1584b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 15852593348eSBarry Smith if (nnz == PETSC_NULL) { 1586119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 15872593348eSBarry Smith else if (nz <= 0) nz = 1; 1588b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1589b6490206SBarry Smith nz = nz*mbs; 15902593348eSBarry Smith } 15912593348eSBarry Smith else { 15922593348eSBarry Smith nz = 0; 1593b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 15942593348eSBarry Smith } 15952593348eSBarry Smith 15962593348eSBarry Smith /* allocate the matrix space */ 15977e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 15982593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 15997e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 16007e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 16012593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 16022593348eSBarry Smith b->i = b->j + nz; 16032593348eSBarry Smith b->singlemalloc = 1; 16042593348eSBarry Smith 1605de6a44a3SBarry Smith b->i[0] = 0; 1606b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 16072593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 16082593348eSBarry Smith } 16092593348eSBarry Smith 1610b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1611b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1612b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1613b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 16142593348eSBarry Smith 1615b6490206SBarry Smith b->bs = bs; 16169df24120SSatish Balay b->bs2 = bs2; 1617b6490206SBarry Smith b->mbs = mbs; 16182593348eSBarry Smith b->nz = 0; 161918e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 16202593348eSBarry Smith b->sorted = 0; 16212593348eSBarry Smith b->roworiented = 1; 16222593348eSBarry Smith b->nonew = 0; 16232593348eSBarry Smith b->diag = 0; 16242593348eSBarry Smith b->solve_work = 0; 1625de6a44a3SBarry Smith b->mult_work = 0; 16262593348eSBarry Smith b->spptr = 0; 16272593348eSBarry Smith *A = B; 16282593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 16292593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 16302593348eSBarry Smith return 0; 16312593348eSBarry Smith } 16322593348eSBarry Smith 1633b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 16342593348eSBarry Smith { 16352593348eSBarry Smith Mat C; 1636b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 16379df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1638de6a44a3SBarry Smith 1639de6a44a3SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 16402593348eSBarry Smith 16412593348eSBarry Smith *B = 0; 1642b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 16432593348eSBarry Smith PLogObjectCreate(C); 1644b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 16452593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1646b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1647b6490206SBarry Smith C->view = MatView_SeqBAIJ; 16482593348eSBarry Smith C->factor = A->factor; 16492593348eSBarry Smith c->row = 0; 16502593348eSBarry Smith c->col = 0; 16512593348eSBarry Smith C->assembled = PETSC_TRUE; 16522593348eSBarry Smith 165344cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 165444cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 165544cd7ae7SLois Curfman McInnes C->M = a->m; 165644cd7ae7SLois Curfman McInnes C->N = a->n; 165744cd7ae7SLois Curfman McInnes 1658b6490206SBarry Smith c->bs = a->bs; 16599df24120SSatish Balay c->bs2 = a->bs2; 1660b6490206SBarry Smith c->mbs = a->mbs; 1661b6490206SBarry Smith c->nbs = a->nbs; 16622593348eSBarry Smith 1663b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1664b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1665b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 16662593348eSBarry Smith c->imax[i] = a->imax[i]; 16672593348eSBarry Smith c->ilen[i] = a->ilen[i]; 16682593348eSBarry Smith } 16692593348eSBarry Smith 16702593348eSBarry Smith /* allocate the matrix space */ 16712593348eSBarry Smith c->singlemalloc = 1; 16727e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 16732593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 16747e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1675de6a44a3SBarry Smith c->i = c->j + nz; 1676b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1677b6490206SBarry Smith if (mbs > 0) { 1678de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 16792593348eSBarry Smith if (cpvalues == COPY_VALUES) { 16807e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 16812593348eSBarry Smith } 16822593348eSBarry Smith } 16832593348eSBarry Smith 1684b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 16852593348eSBarry Smith c->sorted = a->sorted; 16862593348eSBarry Smith c->roworiented = a->roworiented; 16872593348eSBarry Smith c->nonew = a->nonew; 16882593348eSBarry Smith 16892593348eSBarry Smith if (a->diag) { 1690b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1691b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1692b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 16932593348eSBarry Smith c->diag[i] = a->diag[i]; 16942593348eSBarry Smith } 16952593348eSBarry Smith } 16962593348eSBarry Smith else c->diag = 0; 16972593348eSBarry Smith c->nz = a->nz; 16982593348eSBarry Smith c->maxnz = a->maxnz; 16992593348eSBarry Smith c->solve_work = 0; 17002593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 17017fc0212eSBarry Smith c->mult_work = 0; 17022593348eSBarry Smith *B = C; 17032593348eSBarry Smith return 0; 17042593348eSBarry Smith } 17052593348eSBarry Smith 170619bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 17072593348eSBarry Smith { 1708b6490206SBarry Smith Mat_SeqBAIJ *a; 17092593348eSBarry Smith Mat B; 1710de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1711b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 171235aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1713a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 1714b6490206SBarry Smith Scalar *aa; 171519bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 17162593348eSBarry Smith 17171a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1718de6a44a3SBarry Smith bs2 = bs*bs; 1719b6490206SBarry Smith 17202593348eSBarry Smith MPI_Comm_size(comm,&size); 1721b6490206SBarry Smith if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 172290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 172377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1724de6a44a3SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 17252593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 17262593348eSBarry Smith 1727b6490206SBarry Smith if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 172835aab85fSBarry Smith 172935aab85fSBarry Smith /* 173035aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 173135aab85fSBarry Smith divisible by the blocksize 173235aab85fSBarry Smith */ 1733b6490206SBarry Smith mbs = M/bs; 173435aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 173535aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 173635aab85fSBarry Smith else mbs++; 173735aab85fSBarry Smith if (extra_rows) { 17381a6a6d98SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 173935aab85fSBarry Smith } 1740b6490206SBarry Smith 17412593348eSBarry Smith /* read in row lengths */ 174235aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 174377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 174435aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 17452593348eSBarry Smith 1746b6490206SBarry Smith /* read in column indices */ 174735aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 174877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 174935aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1750b6490206SBarry Smith 1751b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1752b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1753b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 175435aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 175535aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 175635aab85fSBarry Smith masked = mask + mbs; 1757b6490206SBarry Smith rowcount = 0; nzcount = 0; 1758b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 175935aab85fSBarry Smith nmask = 0; 1760b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1761b6490206SBarry Smith kmax = rowlengths[rowcount]; 1762b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 176335aab85fSBarry Smith tmp = jj[nzcount++]/bs; 176435aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1765b6490206SBarry Smith } 1766b6490206SBarry Smith rowcount++; 1767b6490206SBarry Smith } 176835aab85fSBarry Smith browlengths[i] += nmask; 176935aab85fSBarry Smith /* zero out the mask elements we set */ 177035aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1771b6490206SBarry Smith } 1772b6490206SBarry Smith 17732593348eSBarry Smith /* create our matrix */ 177435aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 177535aab85fSBarry Smith CHKERRQ(ierr); 17762593348eSBarry Smith B = *A; 1777b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 17782593348eSBarry Smith 17792593348eSBarry Smith /* set matrix "i" values */ 1780de6a44a3SBarry Smith a->i[0] = 0; 1781b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 1782b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1783b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 17842593348eSBarry Smith } 1785b6490206SBarry Smith a->nz = 0; 1786b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 17872593348eSBarry Smith 1788b6490206SBarry Smith /* read in nonzero values */ 178935aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 179077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 179135aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1792b6490206SBarry Smith 1793b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1794b6490206SBarry Smith nzcount = 0; jcount = 0; 1795b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 1796b6490206SBarry Smith nzcountb = nzcount; 179735aab85fSBarry Smith nmask = 0; 1798b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1799b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1800b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 180135aab85fSBarry Smith tmp = jj[nzcount++]/bs; 180235aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1803b6490206SBarry Smith } 1804b6490206SBarry Smith rowcount++; 1805b6490206SBarry Smith } 1806de6a44a3SBarry Smith /* sort the masked values */ 180777c4ece6SBarry Smith PetscSortInt(nmask,masked); 1808de6a44a3SBarry Smith 1809b6490206SBarry Smith /* set "j" values into matrix */ 1810b6490206SBarry Smith maskcount = 1; 181135aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 181235aab85fSBarry Smith a->j[jcount++] = masked[j]; 1813de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1814b6490206SBarry Smith } 1815b6490206SBarry Smith /* set "a" values into matrix */ 1816de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1817b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1818b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1819b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 1820de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1821de6a44a3SBarry Smith block = mask[tmp] - 1; 1822de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1823de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1824b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 1825b6490206SBarry Smith } 1826b6490206SBarry Smith } 182735aab85fSBarry Smith /* zero out the mask elements we set */ 182835aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1829b6490206SBarry Smith } 183035aab85fSBarry Smith if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1831b6490206SBarry Smith 1832b6490206SBarry Smith PetscFree(rowlengths); 1833b6490206SBarry Smith PetscFree(browlengths); 1834b6490206SBarry Smith PetscFree(aa); 1835b6490206SBarry Smith PetscFree(jj); 1836b6490206SBarry Smith PetscFree(mask); 1837b6490206SBarry Smith 1838b6490206SBarry Smith B->assembled = PETSC_TRUE; 1839de6a44a3SBarry Smith 1840de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1841de6a44a3SBarry Smith if (flg) { 184219bcc07fSBarry Smith Viewer tviewer; 184319bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 184490ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 184519bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 184619bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1847de6a44a3SBarry Smith } 1848de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1849de6a44a3SBarry Smith if (flg) { 185019bcc07fSBarry Smith Viewer tviewer; 185119bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 185290ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 185319bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 185419bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1855de6a44a3SBarry Smith } 1856de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1857de6a44a3SBarry Smith if (flg) { 185819bcc07fSBarry Smith Viewer tviewer; 185919bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 186019bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 186119bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1862de6a44a3SBarry Smith } 1863de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1864de6a44a3SBarry Smith if (flg) { 186519bcc07fSBarry Smith Viewer tviewer; 186619bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 186790ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 186819bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 186919bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1870de6a44a3SBarry Smith } 1871de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1872de6a44a3SBarry Smith if (flg) { 187319bcc07fSBarry Smith Viewer tviewer; 187419bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 187519bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 187619bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 187719bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1878de6a44a3SBarry Smith } 18792593348eSBarry Smith return 0; 18802593348eSBarry Smith } 18812593348eSBarry Smith 18822593348eSBarry Smith 18832593348eSBarry Smith 1884