1b6490206SBarry Smith 22593348eSBarry Smith #ifndef lint 3*7ddc982cSLois Curfman McInnes static char vcid[] = "$Id: baij.c,v 1.31 1996/04/08 23:51:58 balay Exp curfman $"; 42593348eSBarry Smith #endif 52593348eSBarry Smith 62593348eSBarry Smith /* 7b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 82593348eSBarry Smith matrix storage format. 92593348eSBarry Smith */ 10b6490206SBarry Smith #include "baij.h" 112593348eSBarry Smith #include "vec/vecimpl.h" 122593348eSBarry Smith #include "inline/spops.h" 132593348eSBarry Smith #include "petsc.h" 142593348eSBarry Smith 15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 162593348eSBarry Smith 17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 182593348eSBarry Smith { 19b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20bcd2baecSBarry Smith int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 212593348eSBarry Smith 222593348eSBarry Smith /* 232593348eSBarry Smith this is tacky: In the future when we have written special factorization 242593348eSBarry Smith and solve routines for the identity permutation we should use a 252593348eSBarry Smith stride index set instead of the general one. 262593348eSBarry Smith */ 272593348eSBarry Smith if (type == ORDER_NATURAL) { 282593348eSBarry Smith idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 292593348eSBarry Smith for ( i=0; i<n; i++ ) idx[i] = i; 302593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 312593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 322593348eSBarry Smith PetscFree(idx); 332593348eSBarry Smith ISSetPermutation(*rperm); 342593348eSBarry Smith ISSetPermutation(*cperm); 352593348eSBarry Smith ISSetIdentity(*rperm); 362593348eSBarry Smith ISSetIdentity(*cperm); 372593348eSBarry Smith return 0; 382593348eSBarry Smith } 392593348eSBarry Smith 40bcd2baecSBarry Smith MatReorderingRegisterAll(); 41bcd2baecSBarry Smith ishift = a->indexshift; 42bcd2baecSBarry Smith oshift = -MatReorderingIndexShift[(int)type]; 43bcd2baecSBarry Smith if (MatReorderingRequiresSymmetric[(int)type]) { 44bcd2baecSBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45bcd2baecSBarry Smith CHKERRQ(ierr); 46bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 472593348eSBarry Smith PetscFree(ia); PetscFree(ja); 48bcd2baecSBarry Smith } else { 49bcd2baecSBarry Smith if (ishift == oshift) { 50bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51bcd2baecSBarry Smith } 52bcd2baecSBarry Smith else if (ishift == -1) { 53bcd2baecSBarry Smith /* temporarily subtract 1 from i and j indices */ 54bcd2baecSBarry Smith int nz = a->i[a->n] - 1; 55bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 56bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 59bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60bcd2baecSBarry Smith } else { 61bcd2baecSBarry Smith /* temporarily add 1 to i and j indices */ 62bcd2baecSBarry Smith int nz = a->i[a->n] - 1; 63bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 64bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 67bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68bcd2baecSBarry Smith } 69bcd2baecSBarry Smith } 702593348eSBarry Smith return 0; 712593348eSBarry Smith } 722593348eSBarry Smith 73de6a44a3SBarry Smith /* 74de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 75de6a44a3SBarry Smith */ 76de6a44a3SBarry Smith 77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 78de6a44a3SBarry Smith { 79de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 807fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 81de6a44a3SBarry Smith 82de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 847fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 85de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86de6a44a3SBarry Smith if (a->j[j] == i) { 87de6a44a3SBarry Smith diag[i] = j; 88de6a44a3SBarry Smith break; 89de6a44a3SBarry Smith } 90de6a44a3SBarry Smith } 91de6a44a3SBarry Smith } 92de6a44a3SBarry Smith a->diag = diag; 93de6a44a3SBarry Smith return 0; 94de6a44a3SBarry Smith } 952593348eSBarry Smith 962593348eSBarry Smith #include "draw.h" 972593348eSBarry Smith #include "pinclude/pviewer.h" 9877c4ece6SBarry Smith #include "sys.h" 992593348eSBarry Smith 100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 1012593348eSBarry Smith { 102b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1037e67e3f9SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104b6490206SBarry Smith Scalar *aa; 1052593348eSBarry Smith 10690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1072593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1082593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1092593348eSBarry Smith col_lens[1] = a->m; 1102593348eSBarry Smith col_lens[2] = a->n; 1117e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1122593348eSBarry Smith 1132593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 114b6490206SBarry Smith count = 0; 115b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 116b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 117b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118b6490206SBarry Smith } 1192593348eSBarry Smith } 12077c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1212593348eSBarry Smith PetscFree(col_lens); 1222593348eSBarry Smith 1232593348eSBarry Smith /* store column indices (zero start index) */ 1247e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125b6490206SBarry Smith count = 0; 126b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 127b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 128b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 130b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1312593348eSBarry Smith } 1322593348eSBarry Smith } 133b6490206SBarry Smith } 134b6490206SBarry Smith } 1357e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136b6490206SBarry Smith PetscFree(jj); 1372593348eSBarry Smith 1382593348eSBarry Smith /* store nonzero values */ 1397e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140b6490206SBarry Smith count = 0; 141b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 142b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 143b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1457e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 146b6490206SBarry Smith } 147b6490206SBarry Smith } 148b6490206SBarry Smith } 149b6490206SBarry Smith } 1507e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151b6490206SBarry Smith PetscFree(aa); 1522593348eSBarry Smith return 0; 1532593348eSBarry Smith } 1542593348eSBarry Smith 155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1562593348eSBarry Smith { 157b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1587e67e3f9SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 1592593348eSBarry Smith FILE *fd; 1602593348eSBarry Smith char *outputname; 1612593348eSBarry Smith 16290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1632593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16490ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 165*7ddc982cSLois Curfman McInnes if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 166*7ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1672593348eSBarry Smith } 16890ace30eSBarry Smith else if (format == ASCII_FORMAT_MATLAB) { 169b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 1702593348eSBarry Smith } 17144cd7ae7SLois Curfman McInnes else if (format == ASCII_FORMAT_COMMON) { 17244cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17344cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17444cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17544cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17644cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17744cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 17844cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17944cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 18044cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 18144cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18344cd7ae7SLois Curfman McInnes #else 18444cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18544cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18644cd7ae7SLois Curfman McInnes #endif 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 19044cd7ae7SLois Curfman McInnes } 19144cd7ae7SLois Curfman McInnes } 19244cd7ae7SLois Curfman McInnes } 1932593348eSBarry Smith else { 194b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 195b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 196b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 197b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 2007e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 20188685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2027e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 20488685aaeSLois Curfman McInnes else { 2057e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20688685aaeSLois Curfman McInnes } 20788685aaeSLois Curfman McInnes #else 2087e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20988685aaeSLois Curfman McInnes #endif 2102593348eSBarry Smith } 2112593348eSBarry Smith } 2122593348eSBarry Smith fprintf(fd,"\n"); 2132593348eSBarry Smith } 2142593348eSBarry Smith } 215b6490206SBarry Smith } 2162593348eSBarry Smith fflush(fd); 2172593348eSBarry Smith return 0; 2182593348eSBarry Smith } 2192593348eSBarry Smith 220b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 2212593348eSBarry Smith { 2222593348eSBarry Smith Mat A = (Mat) obj; 22319bcc07fSBarry Smith ViewerType vtype; 22419bcc07fSBarry Smith int ierr; 2252593348eSBarry Smith 2262593348eSBarry Smith if (!viewer) { 22719bcc07fSBarry Smith viewer = STDOUT_VIEWER_SELF; 2282593348eSBarry Smith } 22919bcc07fSBarry Smith 23019bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 23119bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 232b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 2332593348eSBarry Smith } 23419bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 2362593348eSBarry Smith } 23719bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 238b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 2392593348eSBarry Smith } 24019bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 241b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 2422593348eSBarry Smith } 2432593348eSBarry Smith return 0; 2442593348eSBarry Smith } 245b6490206SBarry Smith 246119a934aSSatish Balay #define CHUNKSIZE 10 247cd0e1443SSatish Balay 248cd0e1443SSatish Balay /* This version has row oriented v */ 249cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250cd0e1443SSatish Balay { 251cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254cd0e1443SSatish Balay int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 2557e67e3f9SSatish Balay int ridx,cidx,bs2=bs*bs; 256cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 257cd0e1443SSatish Balay 258cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 259cd0e1443SSatish Balay row = im[k]; brow = row/bs; 260cd0e1443SSatish Balay if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261cd0e1443SSatish Balay if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 2627e67e3f9SSatish Balay rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 263cd0e1443SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; 264cd0e1443SSatish Balay low = 0; 265cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 266cd0e1443SSatish Balay if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267cd0e1443SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268cd0e1443SSatish Balay col = in[l] - shift; bcol = col/bs; 269cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 270cd0e1443SSatish Balay if (roworiented) { 271cd0e1443SSatish Balay value = *v++; 272cd0e1443SSatish Balay } 273cd0e1443SSatish Balay else { 274cd0e1443SSatish Balay value = v[k + l*m]; 275cd0e1443SSatish Balay } 276cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 277cd0e1443SSatish Balay while (high-low > 5) { 278cd0e1443SSatish Balay t = (low+high)/2; 279cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 280cd0e1443SSatish Balay else low = t; 281cd0e1443SSatish Balay } 282cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 283cd0e1443SSatish Balay if (rp[i] > bcol) break; 284cd0e1443SSatish Balay if (rp[i] == bcol) { 2857e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 286cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 287cd0e1443SSatish Balay else *bap = value; 288cd0e1443SSatish Balay goto noinsert; 289cd0e1443SSatish Balay } 290cd0e1443SSatish Balay } 291cd0e1443SSatish Balay if (nonew) goto noinsert; 292cd0e1443SSatish Balay if (nrow >= rmax) { 293cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 294cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295cd0e1443SSatish Balay Scalar *new_a; 296cd0e1443SSatish Balay 297cd0e1443SSatish Balay /* malloc new storage space */ 2987e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 3007e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 301cd0e1443SSatish Balay new_i = new_j + new_nz; 302cd0e1443SSatish Balay 303cd0e1443SSatish Balay /* copy over old data into new slots */ 304cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 3057e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 3067e67e3f9SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 307cd0e1443SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 308cd0e1443SSatish Balay PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 309cd0e1443SSatish Balay len*sizeof(int)); 3107e67e3f9SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 3117e67e3f9SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 3127e67e3f9SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 3137e67e3f9SSatish Balay aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 314cd0e1443SSatish Balay /* free up old matrix storage */ 315cd0e1443SSatish Balay PetscFree(a->a); 316cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318cd0e1443SSatish Balay a->singlemalloc = 1; 319cd0e1443SSatish Balay 3207e67e3f9SSatish Balay rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 321cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 3227e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 323119a934aSSatish Balay a->maxnz += CHUNKSIZE; 324cd0e1443SSatish Balay a->reallocs++; 325119a934aSSatish Balay a->nz++; 326cd0e1443SSatish Balay } 3277e67e3f9SSatish Balay N = nrow++ - 1; 328cd0e1443SSatish Balay /* shift up all the later entries in this row */ 329cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 330cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 3317e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332cd0e1443SSatish Balay } 3337e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334cd0e1443SSatish Balay rp[i] = bcol; 3357e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 336cd0e1443SSatish Balay noinsert:; 337cd0e1443SSatish Balay low = i; 338cd0e1443SSatish Balay } 339cd0e1443SSatish Balay ailen[brow] = nrow; 340cd0e1443SSatish Balay } 341cd0e1443SSatish Balay return 0; 342cd0e1443SSatish Balay } 343cd0e1443SSatish Balay 3440b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 3450b824a48SSatish Balay { 3460b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3470b824a48SSatish Balay *m = a->m; *n = a->n; 3480b824a48SSatish Balay return 0; 3490b824a48SSatish Balay } 3500b824a48SSatish Balay 3510b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 3520b824a48SSatish Balay { 3530b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3540b824a48SSatish Balay *m = 0; *n = a->m; 3550b824a48SSatish Balay return 0; 3560b824a48SSatish Balay } 3570b824a48SSatish Balay 3589867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 3599867e207SSatish Balay { 3609867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3617e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 3629867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 3639867e207SSatish Balay 3649867e207SSatish Balay bs = a->bs; 3659867e207SSatish Balay ai = a->i; 3669867e207SSatish Balay aj = a->j; 3679867e207SSatish Balay aa = a->a; 3687e67e3f9SSatish Balay bs2 = bs*bs; 3699867e207SSatish Balay 3709867e207SSatish Balay if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 3719867e207SSatish Balay 3729867e207SSatish Balay bn = row/bs; /* Block number */ 3739867e207SSatish Balay bp = row % bs; /* Block Position */ 3749867e207SSatish Balay M = ai[bn+1] - ai[bn]; 3759867e207SSatish Balay *nz = bs*M; 3769867e207SSatish Balay 3779867e207SSatish Balay if (v) { 3789867e207SSatish Balay *v = 0; 3799867e207SSatish Balay if (*nz) { 3809867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 3819867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3829867e207SSatish Balay v_i = *v + i*bs; 3837e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 3847e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 3859867e207SSatish Balay } 3869867e207SSatish Balay } 3879867e207SSatish Balay } 3889867e207SSatish Balay 3899867e207SSatish Balay if (idx) { 3909867e207SSatish Balay *idx = 0; 3919867e207SSatish Balay if (*nz) { 3929867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 3939867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3949867e207SSatish Balay idx_i = *idx + i*bs; 3959867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 3969867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 3979867e207SSatish Balay } 3989867e207SSatish Balay } 3999867e207SSatish Balay } 4009867e207SSatish Balay return 0; 4019867e207SSatish Balay } 4029867e207SSatish Balay 4039867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4049867e207SSatish Balay { 4059867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 4069867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 4079867e207SSatish Balay return 0; 4089867e207SSatish Balay } 409b6490206SBarry Smith 410f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411f2501298SSatish Balay { 412f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413f2501298SSatish Balay Mat C; 414f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 415f2501298SSatish Balay int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 416f2501298SSatish Balay Scalar *array=a->a; 417f2501298SSatish Balay 418f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 419f2501298SSatish Balay SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 422f2501298SSatish Balay if (shift) { 423f2501298SSatish Balay for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 424f2501298SSatish Balay } 425f2501298SSatish Balay for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 426f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 427f2501298SSatish Balay PetscFree(col); 428f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 429f2501298SSatish Balay cols = rows + bs; 430f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 431f2501298SSatish Balay cols[0] = i*bs; 432f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 433f2501298SSatish Balay len = ai[i+1] - ai[i]; 434f2501298SSatish Balay for ( j=0; j<len; j++ ) { 435f2501298SSatish Balay rows[0] = (*aj++)*bs; 436f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 437f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 438f2501298SSatish Balay array += bs2; 439f2501298SSatish Balay } 440f2501298SSatish Balay } 4411073c447SSatish Balay PetscFree(rows); 442f2501298SSatish Balay if (shift) { 443f2501298SSatish Balay for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 444f2501298SSatish Balay } 445f2501298SSatish Balay 446f2501298SSatish Balay ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 447f2501298SSatish Balay ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 448f2501298SSatish Balay 449f2501298SSatish Balay if (B != PETSC_NULL) { 450f2501298SSatish Balay *B = C; 451f2501298SSatish Balay } else { 452f2501298SSatish Balay /* This isn't really an in-place transpose */ 453f2501298SSatish Balay PetscFree(a->a); 454f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 455f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 456f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 457f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 458f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 459f2501298SSatish Balay PetscFree(a); 460f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 461f2501298SSatish Balay PetscHeaderDestroy(C); 462f2501298SSatish Balay } 463f2501298SSatish Balay return 0; 464f2501298SSatish Balay } 465f2501298SSatish Balay 466f2501298SSatish Balay 467584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 468584200bdSSatish Balay { 469584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 470584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 471584200bdSSatish Balay int m = a->m,*ip, N, *ailen = a->ilen,shift = a->indexshift; 472584200bdSSatish Balay int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 473584200bdSSatish Balay Scalar *aa = a->a, *ap; 474584200bdSSatish Balay 475584200bdSSatish Balay if (mode == FLUSH_ASSEMBLY) return 0; 476584200bdSSatish Balay 477584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 478584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 479584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 480584200bdSSatish Balay if (fshift) { 481584200bdSSatish Balay ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 482584200bdSSatish Balay N = ailen[i]; 483584200bdSSatish Balay for ( j=0; j<N; j++ ) { 484584200bdSSatish Balay ip[j-fshift] = ip[j]; 4857e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 486584200bdSSatish Balay } 487584200bdSSatish Balay } 488584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 489584200bdSSatish Balay } 490584200bdSSatish Balay if (mbs) { 491584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 492584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 493584200bdSSatish Balay } 494584200bdSSatish Balay /* reset ilen and imax for each row */ 495584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 496584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 497584200bdSSatish Balay } 498119a934aSSatish Balay a->nz = ai[mbs] + shift; 499584200bdSSatish Balay 500584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 501584200bdSSatish Balay if (fshift && a->diag) { 502584200bdSSatish Balay PetscFree(a->diag); 503584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 504584200bdSSatish Balay a->diag = 0; 505584200bdSSatish Balay } 506584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 507584200bdSSatish Balay fshift,a->nz,m); 508584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 509584200bdSSatish Balay a->reallocs); 510584200bdSSatish Balay return 0; 511584200bdSSatish Balay } 512584200bdSSatish Balay 513b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 5142593348eSBarry Smith { 515b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 516de6a44a3SBarry Smith PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 5172593348eSBarry Smith return 0; 5182593348eSBarry Smith } 5192593348eSBarry Smith 520b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 5212593348eSBarry Smith { 5222593348eSBarry Smith Mat A = (Mat) obj; 523b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5242593348eSBarry Smith 5252593348eSBarry Smith #if defined(PETSC_LOG) 5262593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 5272593348eSBarry Smith #endif 5282593348eSBarry Smith PetscFree(a->a); 5292593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 5302593348eSBarry Smith if (a->diag) PetscFree(a->diag); 5312593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 5322593348eSBarry Smith if (a->imax) PetscFree(a->imax); 5332593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 534de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 5352593348eSBarry Smith PetscFree(a); 536f2655603SLois Curfman McInnes PLogObjectDestroy(A); 537f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 5382593348eSBarry Smith return 0; 5392593348eSBarry Smith } 5402593348eSBarry Smith 541b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 5422593348eSBarry Smith { 543b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5442593348eSBarry Smith if (op == ROW_ORIENTED) a->roworiented = 1; 5452593348eSBarry Smith else if (op == COLUMN_ORIENTED) a->roworiented = 0; 5462593348eSBarry Smith else if (op == COLUMNS_SORTED) a->sorted = 1; 5472593348eSBarry Smith else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 5482593348eSBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 5492593348eSBarry Smith else if (op == ROWS_SORTED || 5502593348eSBarry Smith op == SYMMETRIC_MATRIX || 5512593348eSBarry Smith op == STRUCTURALLY_SYMMETRIC_MATRIX || 5522593348eSBarry Smith op == YES_NEW_DIAGONALS) 55394a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 5542593348eSBarry Smith else if (op == NO_NEW_DIAGONALS) 555b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 5562593348eSBarry Smith else 557b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 5582593348eSBarry Smith return 0; 5592593348eSBarry Smith } 5602593348eSBarry Smith 5612593348eSBarry Smith 5622593348eSBarry Smith /* -------------------------------------------------------*/ 5632593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 5642593348eSBarry Smith /* -------------------------------------------------------*/ 565b6490206SBarry Smith #include "pinclude/plapack.h" 566b6490206SBarry Smith 567b6490206SBarry Smith static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 5682593348eSBarry Smith { 569b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 570b6490206SBarry Smith Scalar *xg,*yg; 571b6490206SBarry Smith register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 572b6490206SBarry Smith register Scalar x1,x2,x3,x4,x5; 573b6490206SBarry Smith int mbs = a->mbs, m = a->m, i, *idx,*ii; 574b6490206SBarry Smith int bs = a->bs,j,n,bs2 = bs*bs; 5752593348eSBarry Smith 576b6490206SBarry Smith VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 577b6490206SBarry Smith PetscMemzero(y,m*sizeof(Scalar)); 578119a934aSSatish Balay /* 579119a934aSSatish Balay x = x; */ 5802593348eSBarry Smith idx = a->j; 5812593348eSBarry Smith v = a->a; 5822593348eSBarry Smith ii = a->i; 583b6490206SBarry Smith 584b6490206SBarry Smith switch (bs) { 585b6490206SBarry Smith case 1: 586119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 5872593348eSBarry Smith n = ii[1] - ii[0]; ii++; 5882593348eSBarry Smith sum = 0.0; 5892593348eSBarry Smith while (n--) sum += *v++ * x[*idx++]; 5902593348eSBarry Smith y[i] = sum; 5912593348eSBarry Smith } 5922593348eSBarry Smith break; 593b6490206SBarry Smith case 2: 594b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 595de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 596b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; 597b6490206SBarry Smith for ( j=0; j<n; j++ ) { 598b6490206SBarry Smith xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 599b6490206SBarry Smith sum1 += v[0]*x1 + v[2]*x2; 600b6490206SBarry Smith sum2 += v[1]*x1 + v[3]*x2; 601b6490206SBarry Smith v += 4; 602b6490206SBarry Smith } 603119a934aSSatish Balay y[0] = sum1; y[1] = sum2; 604b6490206SBarry Smith y += 2; 605b6490206SBarry Smith } 606b6490206SBarry Smith break; 607b6490206SBarry Smith case 3: 608b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 609de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 610b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 611b6490206SBarry Smith for ( j=0; j<n; j++ ) { 612b6490206SBarry Smith xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 613b6490206SBarry Smith sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 614b6490206SBarry Smith sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 615b6490206SBarry Smith sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 616b6490206SBarry Smith v += 9; 617b6490206SBarry Smith } 618119a934aSSatish Balay y[0] = sum1; y[1] = sum2; y[2] = sum3; 619b6490206SBarry Smith y += 3; 620b6490206SBarry Smith } 621b6490206SBarry Smith break; 622b6490206SBarry Smith case 4: 623b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 624de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 625b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 626b6490206SBarry Smith for ( j=0; j<n; j++ ) { 627b6490206SBarry Smith xb = x + 4*(*idx++); 628b6490206SBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 629b6490206SBarry Smith sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 630b6490206SBarry Smith sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 631b6490206SBarry Smith sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 632b6490206SBarry Smith sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 633b6490206SBarry Smith v += 16; 634b6490206SBarry Smith } 635119a934aSSatish Balay y[0] = sum1; y[1] = sum2; y[2] = sum3; y[3] = sum4; 636b6490206SBarry Smith y += 4; 637b6490206SBarry Smith } 638b6490206SBarry Smith break; 639b6490206SBarry Smith case 5: 640b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 641de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 642b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 643b6490206SBarry Smith for ( j=0; j<n; j++ ) { 644b6490206SBarry Smith xb = x + 5*(*idx++); 645b6490206SBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 646b6490206SBarry Smith sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 647b6490206SBarry Smith sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 648b6490206SBarry Smith sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 649b6490206SBarry Smith sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 650b6490206SBarry Smith sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 651b6490206SBarry Smith v += 25; 652b6490206SBarry Smith } 653119a934aSSatish Balay y[0] = sum1; y[1] = sum2; y[2] = sum3; y[3] = sum4; y[4] = sum5; 654b6490206SBarry Smith y += 5; 655b6490206SBarry Smith } 656b6490206SBarry Smith break; 657b6490206SBarry Smith /* block sizes larger then 5 by 5 are handled by BLAS */ 658b6490206SBarry Smith default: { 659de6a44a3SBarry Smith int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 660de6a44a3SBarry Smith if (!a->mult_work) { 661de6a44a3SBarry Smith a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 662de6a44a3SBarry Smith CHKPTRQ(a->mult_work); 663de6a44a3SBarry Smith } 664de6a44a3SBarry Smith work = a->mult_work; 665b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 666de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 667de6a44a3SBarry Smith ncols = n*bs; 668de6a44a3SBarry Smith workt = work; 669b6490206SBarry Smith for ( j=0; j<n; j++ ) { 670b6490206SBarry Smith xb = x + bs*(*idx++); 671de6a44a3SBarry Smith for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 672de6a44a3SBarry Smith workt += bs; 673b6490206SBarry Smith } 674de6a44a3SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 675de6a44a3SBarry Smith v += n*bs2; 676b6490206SBarry Smith y += bs; 6772593348eSBarry Smith } 6782593348eSBarry Smith } 6792593348eSBarry Smith } 680119a934aSSatish Balay PLogFlops(2*bs2* (a->nz) - m); 681119a934aSSatish Balay return 0; 682119a934aSSatish Balay } 683119a934aSSatish Balay 684119a934aSSatish Balay static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 685119a934aSSatish Balay { 686119a934aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 687119a934aSSatish Balay Scalar *xg,*yg,*zg; 688119a934aSSatish Balay register Scalar *x,*y,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 689119a934aSSatish Balay register Scalar x1,x2,x3,x4,x5; 690119a934aSSatish Balay int mbs=a->mbs,m=a->m,i,*idx,*ii; 691119a934aSSatish Balay int bs=a->bs,j,n,bs2=bs*bs; 692119a934aSSatish Balay 693119a934aSSatish Balay VecGetArray(xx,&xg); x = xg; 694119a934aSSatish Balay VecGetArray(yy,&yg); y = yg; 695119a934aSSatish Balay VecGetArray(zz,&zg); z = zg; 696119a934aSSatish Balay 697119a934aSSatish Balay if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); 698119a934aSSatish Balay else if (zz!=yy) PetscMemcpy(z,y,m*sizeof(Scalar)); 699119a934aSSatish Balay 700119a934aSSatish Balay idx = a->j; 701119a934aSSatish Balay v = a->a; 702119a934aSSatish Balay ii = a->i; 703119a934aSSatish Balay 704119a934aSSatish Balay switch (bs) { 705119a934aSSatish Balay case 1: 706119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 707119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 708119a934aSSatish Balay sum = 0.0; 709119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 710119a934aSSatish Balay z[i] += sum; 711119a934aSSatish Balay } 712119a934aSSatish Balay break; 713119a934aSSatish Balay case 2: 714119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 715119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 716119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 717119a934aSSatish Balay for ( j=0; j<n; j++ ) { 718119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 719119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 720119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 721119a934aSSatish Balay v += 4; 722119a934aSSatish Balay } 723119a934aSSatish Balay z[0] += sum1; z[1] += sum2; 724119a934aSSatish Balay z += 2; 725119a934aSSatish Balay } 726119a934aSSatish Balay break; 727119a934aSSatish Balay case 3: 728119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 729119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 730119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 731119a934aSSatish Balay for ( j=0; j<n; j++ ) { 732119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 733119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 734119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 735119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 736119a934aSSatish Balay v += 9; 737119a934aSSatish Balay } 738119a934aSSatish Balay z[0] += sum1; z[1] += sum2; z[2] += sum3; 739119a934aSSatish Balay z += 3; 740119a934aSSatish Balay } 741119a934aSSatish Balay break; 742119a934aSSatish Balay case 4: 743119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 744119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 745119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 746119a934aSSatish Balay for ( j=0; j<n; j++ ) { 747119a934aSSatish Balay xb = x + 4*(*idx++); 748119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 749119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 750119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 751119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 752119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 753119a934aSSatish Balay v += 16; 754119a934aSSatish Balay } 755119a934aSSatish Balay z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; 756119a934aSSatish Balay z += 4; 757119a934aSSatish Balay } 758119a934aSSatish Balay break; 759119a934aSSatish Balay case 5: 760119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 761119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 762119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 763119a934aSSatish Balay for ( j=0; j<n; j++ ) { 764119a934aSSatish Balay xb = x + 5*(*idx++); 765119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 766119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 767119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 768119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 769119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 770119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 771119a934aSSatish Balay v += 25; 772119a934aSSatish Balay } 773119a934aSSatish Balay z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5; 774119a934aSSatish Balay z += 5; 775119a934aSSatish Balay } 776119a934aSSatish Balay break; 777119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 778119a934aSSatish Balay default: { 779119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 780119a934aSSatish Balay if (!a->mult_work) { 781119a934aSSatish Balay a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 782119a934aSSatish Balay CHKPTRQ(a->mult_work); 783119a934aSSatish Balay } 784119a934aSSatish Balay work = a->mult_work; 785119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 786119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 787119a934aSSatish Balay ncols = n*bs; 788119a934aSSatish Balay workt = work; 789119a934aSSatish Balay for ( j=0; j<n; j++ ) { 790119a934aSSatish Balay xb = x + bs*(*idx++); 791119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 792119a934aSSatish Balay workt += bs; 793119a934aSSatish Balay } 794119a934aSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 795119a934aSSatish Balay v += n*bs2; 796119a934aSSatish Balay z += bs; 797119a934aSSatish Balay } 798119a934aSSatish Balay } 799119a934aSSatish Balay } 800119a934aSSatish Balay PLogFlops(2*bs2*(a->nz)); 801119a934aSSatish Balay return 0; 802119a934aSSatish Balay } 803119a934aSSatish Balay 804119a934aSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 805119a934aSSatish Balay { 806119a934aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 807119a934aSSatish Balay Scalar *xg,*yg,*zg,*zb; 808119a934aSSatish Balay register Scalar *x,*y,*z,*v,*xb,x1,x2,x3,x4,x5; 809119a934aSSatish Balay int mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval; 810119a934aSSatish Balay int bs=a->bs,j,n,bs2=bs*bs,*ib; 811119a934aSSatish Balay 812119a934aSSatish Balay VecGetArray(xx,&xg); x = xg; 813119a934aSSatish Balay VecGetArray(yy,&yg); y = yg; 814119a934aSSatish Balay VecGetArray(zz,&zg); z = zg; 815119a934aSSatish Balay 816119a934aSSatish Balay if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); 817119a934aSSatish Balay else if (zz!=yy) PetscMemcpy(z,y,N*sizeof(Scalar)); 818119a934aSSatish Balay 819119a934aSSatish Balay idx = a->j; 820119a934aSSatish Balay v = a->a; 821119a934aSSatish Balay ii = a->i; 822119a934aSSatish Balay 823119a934aSSatish Balay switch (bs) { 824119a934aSSatish Balay case 1: 825119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 826119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 827119a934aSSatish Balay xb = x + i; x1 = xb[0]; 828119a934aSSatish Balay ib = idx + ai[i]; 829119a934aSSatish Balay for ( j=0; j<n; j++ ) { 830119a934aSSatish Balay z[ib[j]] += *v++ * x1; 831119a934aSSatish Balay } 832119a934aSSatish Balay } 833119a934aSSatish Balay break; 834119a934aSSatish Balay case 2: 835119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 836119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 837119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 838119a934aSSatish Balay ib = idx + ai[i]; 839119a934aSSatish Balay for ( j=0; j<n; j++ ) { 840119a934aSSatish Balay rval = ib[j]*2; 841119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 842119a934aSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 843119a934aSSatish Balay v += 4; 844119a934aSSatish Balay } 845119a934aSSatish Balay } 846119a934aSSatish Balay break; 847119a934aSSatish Balay case 3: 848119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 849119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 850119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 851119a934aSSatish Balay ib = idx + ai[i]; 852119a934aSSatish Balay for ( j=0; j<n; j++ ) { 853119a934aSSatish Balay rval = ib[j]*3; 854119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 855119a934aSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 856119a934aSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 857119a934aSSatish Balay v += 9; 858119a934aSSatish Balay } 859119a934aSSatish Balay } 860119a934aSSatish Balay break; 861119a934aSSatish Balay case 4: 862119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 863119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 864119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 865119a934aSSatish Balay ib = idx + ai[i]; 866119a934aSSatish Balay for ( j=0; j<n; j++ ) { 867119a934aSSatish Balay rval = ib[j]*4; 868119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 869119a934aSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 870119a934aSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 871119a934aSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 872119a934aSSatish Balay v += 16; 873119a934aSSatish Balay } 874119a934aSSatish Balay } 875119a934aSSatish Balay break; 876119a934aSSatish Balay case 5: 877119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 878119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 879119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 880119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 881119a934aSSatish Balay ib = idx + ai[i]; 882119a934aSSatish Balay for ( j=0; j<n; j++ ) { 883119a934aSSatish Balay rval = ib[j]*5; 884119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 885119a934aSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 886119a934aSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 887119a934aSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 888119a934aSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 889119a934aSSatish Balay v += 25; 890119a934aSSatish Balay } 891119a934aSSatish Balay } 892119a934aSSatish Balay break; 893119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 894119a934aSSatish Balay default: { 895119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 896119a934aSSatish Balay if (!a->mult_work) { 897119a934aSSatish Balay /* must be max of m,n */ 898119a934aSSatish Balay a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 899119a934aSSatish Balay CHKPTRQ(a->mult_work); 900119a934aSSatish Balay } 901119a934aSSatish Balay work = a->mult_work; 902119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 903119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 904119a934aSSatish Balay ncols = n*bs; 905119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 906119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 907119a934aSSatish Balay v += n*bs2; 908119a934aSSatish Balay x += bs; 909119a934aSSatish Balay workt = work; 910119a934aSSatish Balay for ( j=0; j<n; j++ ) { 911119a934aSSatish Balay zb = z + bs*(*idx++); 912119a934aSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 913119a934aSSatish Balay workt += bs; 914119a934aSSatish Balay } 915119a934aSSatish Balay } 916119a934aSSatish Balay } 917119a934aSSatish Balay } 918119a934aSSatish Balay PLogFlops(2*bs2*(a->nz)); 9192593348eSBarry Smith return 0; 9202593348eSBarry Smith } 9212593348eSBarry Smith 922de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 9232593348eSBarry Smith { 924b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 925bcd2baecSBarry Smith if (nz) *nz = a->bs*a->bs*a->nz; 926bcd2baecSBarry Smith if (nza) *nza = a->maxnz; 927bcd2baecSBarry Smith if (mem) *mem = (int)A->mem; 9282593348eSBarry Smith return 0; 9292593348eSBarry Smith } 9302593348eSBarry Smith 93191d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 93291d316f6SSatish Balay { 93391d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 93491d316f6SSatish Balay 93591d316f6SSatish Balay if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 93691d316f6SSatish Balay 93791d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 93891d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 93991d316f6SSatish Balay (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 94091d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 94191d316f6SSatish Balay } 94291d316f6SSatish Balay 94391d316f6SSatish Balay /* if the a->i are the same */ 94491d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 94591d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 94691d316f6SSatish Balay } 94791d316f6SSatish Balay 94891d316f6SSatish Balay /* if a->j are the same */ 94991d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 95091d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 95191d316f6SSatish Balay } 95291d316f6SSatish Balay 95391d316f6SSatish Balay /* if a->a are the same */ 95491d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 95591d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 95691d316f6SSatish Balay } 95791d316f6SSatish Balay *flg = PETSC_TRUE; 95891d316f6SSatish Balay return 0; 95991d316f6SSatish Balay 96091d316f6SSatish Balay } 96191d316f6SSatish Balay 96291d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 96391d316f6SSatish Balay { 96491d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 9657e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 96617e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 96717e48fc4SSatish Balay 96817e48fc4SSatish Balay bs = a->bs; 96917e48fc4SSatish Balay aa = a->a; 97017e48fc4SSatish Balay ai = a->i; 97117e48fc4SSatish Balay aj = a->j; 97217e48fc4SSatish Balay ambs = a->mbs; 9737e67e3f9SSatish Balay bs2 = bs*bs; 97491d316f6SSatish Balay 97591d316f6SSatish Balay VecSet(&zero,v); 97691d316f6SSatish Balay VecGetArray(v,&x); VecGetLocalSize(v,&n); 9779867e207SSatish Balay if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 97817e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 97917e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 98017e48fc4SSatish Balay if (aj[j] == i) { 98117e48fc4SSatish Balay row = i*bs; 9827e67e3f9SSatish Balay aa_j = aa+j*bs2; 9837e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 98491d316f6SSatish Balay break; 98591d316f6SSatish Balay } 98691d316f6SSatish Balay } 98791d316f6SSatish Balay } 98891d316f6SSatish Balay return 0; 98991d316f6SSatish Balay } 99091d316f6SSatish Balay 9919867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 9929867e207SSatish Balay { 9939867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 9949867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 9957e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 9969867e207SSatish Balay 9979867e207SSatish Balay ai = a->i; 9989867e207SSatish Balay aj = a->j; 9999867e207SSatish Balay aa = a->a; 10009867e207SSatish Balay m = a->m; 10019867e207SSatish Balay n = a->n; 10029867e207SSatish Balay bs = a->bs; 10039867e207SSatish Balay mbs = a->mbs; 10047e67e3f9SSatish Balay bs2 = bs*bs; 10059867e207SSatish Balay if (ll) { 10069867e207SSatish Balay VecGetArray(ll,&l); VecGetSize(ll,&lm); 10079867e207SSatish Balay if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 10089867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 10099867e207SSatish Balay M = ai[i+1] - ai[i]; 10109867e207SSatish Balay li = l + i*bs; 10117e67e3f9SSatish Balay v = aa + bs2*ai[i]; 10129867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 10137e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 10149867e207SSatish Balay (*v++) *= li[k%bs]; 10159867e207SSatish Balay } 10169867e207SSatish Balay } 10179867e207SSatish Balay } 10189867e207SSatish Balay } 10199867e207SSatish Balay 10209867e207SSatish Balay if (rr) { 10219867e207SSatish Balay VecGetArray(rr,&r); VecGetSize(rr,&rn); 10229867e207SSatish Balay if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 10239867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 10249867e207SSatish Balay M = ai[i+1] - ai[i]; 10257e67e3f9SSatish Balay v = aa + bs2*ai[i]; 10269867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 10279867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 10289867e207SSatish Balay for ( k=0; k<bs; k++ ) { 10299867e207SSatish Balay x = ri[k]; 10309867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 10319867e207SSatish Balay } 10329867e207SSatish Balay } 10339867e207SSatish Balay } 10349867e207SSatish Balay } 10359867e207SSatish Balay return 0; 10369867e207SSatish Balay } 10379867e207SSatish Balay 10389867e207SSatish Balay 1039b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1040b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1041b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 10427fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 10437fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 10447fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 10457fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 10467fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 10477fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 10482593348eSBarry Smith 1049b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 10502593348eSBarry Smith { 1051b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 10522593348eSBarry Smith Scalar *v = a->a; 10532593348eSBarry Smith double sum = 0.0; 1054b6490206SBarry Smith int i; 10552593348eSBarry Smith 10562593348eSBarry Smith if (type == NORM_FROBENIUS) { 10572593348eSBarry Smith for (i=0; i<a->nz; i++ ) { 10582593348eSBarry Smith #if defined(PETSC_COMPLEX) 10592593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 10602593348eSBarry Smith #else 10612593348eSBarry Smith sum += (*v)*(*v); v++; 10622593348eSBarry Smith #endif 10632593348eSBarry Smith } 10642593348eSBarry Smith *norm = sqrt(sum); 10652593348eSBarry Smith } 10662593348eSBarry Smith else { 1067b6490206SBarry Smith SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 10682593348eSBarry Smith } 10692593348eSBarry Smith return 0; 10702593348eSBarry Smith } 10712593348eSBarry Smith 10722593348eSBarry Smith /* 10732593348eSBarry Smith note: This can only work for identity for row and col. It would 10742593348eSBarry Smith be good to check this and otherwise generate an error. 10752593348eSBarry Smith */ 1076b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 10772593348eSBarry Smith { 1078b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 10792593348eSBarry Smith Mat outA; 1080de6a44a3SBarry Smith int ierr; 10812593348eSBarry Smith 1082b6490206SBarry Smith if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 10832593348eSBarry Smith 10842593348eSBarry Smith outA = inA; 10852593348eSBarry Smith inA->factor = FACTOR_LU; 10862593348eSBarry Smith a->row = row; 10872593348eSBarry Smith a->col = col; 10882593348eSBarry Smith 1089de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 10902593348eSBarry Smith 10912593348eSBarry Smith if (!a->diag) { 1092b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 10932593348eSBarry Smith } 10947fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 10952593348eSBarry Smith return 0; 10962593348eSBarry Smith } 10972593348eSBarry Smith 1098b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 10992593348eSBarry Smith { 1100b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1101b6490206SBarry Smith int one = 1, totalnz = a->bs*a->bs*a->nz; 1102b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1103b6490206SBarry Smith PLogFlops(totalnz); 11042593348eSBarry Smith return 0; 11052593348eSBarry Smith } 11062593348eSBarry Smith 11077e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 11087e67e3f9SSatish Balay { 11097e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 11107e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 11117e67e3f9SSatish Balay int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 11127e67e3f9SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 11137e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 11147e67e3f9SSatish Balay 11157e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 11167e67e3f9SSatish Balay row = im[k]; brow = row/bs; 11177e67e3f9SSatish Balay if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 11187e67e3f9SSatish Balay if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 11197e67e3f9SSatish Balay rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 11207e67e3f9SSatish Balay nrow = ailen[brow]; 11217e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 11227e67e3f9SSatish Balay if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 11237e67e3f9SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 11247e67e3f9SSatish Balay col = in[l] - shift; 11257e67e3f9SSatish Balay bcol = col/bs; 11267e67e3f9SSatish Balay cidx = col%bs; 11277e67e3f9SSatish Balay ridx = row%bs; 11287e67e3f9SSatish Balay high = nrow; 11297e67e3f9SSatish Balay low = 0; /* assume unsorted */ 11307e67e3f9SSatish Balay while (high-low > 5) { 11317e67e3f9SSatish Balay t = (low+high)/2; 11327e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 11337e67e3f9SSatish Balay else low = t; 11347e67e3f9SSatish Balay } 11357e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 11367e67e3f9SSatish Balay if (rp[i] > bcol) break; 11377e67e3f9SSatish Balay if (rp[i] == bcol) { 11387e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 11397e67e3f9SSatish Balay goto finished; 11407e67e3f9SSatish Balay } 11417e67e3f9SSatish Balay } 11427e67e3f9SSatish Balay *v++ = zero; 11437e67e3f9SSatish Balay finished:; 11447e67e3f9SSatish Balay } 11457e67e3f9SSatish Balay } 11467e67e3f9SSatish Balay return 0; 11477e67e3f9SSatish Balay } 11487e67e3f9SSatish Balay 1149b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A) 11502593348eSBarry Smith { 11512593348eSBarry Smith static int called = 0; 11522593348eSBarry Smith 11532593348eSBarry Smith if (called) return 0; else called = 1; 11542593348eSBarry Smith return 0; 11552593348eSBarry Smith } 11562593348eSBarry Smith /* -------------------------------------------------------------------*/ 1157cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 11589867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1159119a934aSSatish Balay MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ, 1160119a934aSSatish Balay 0,MatMultTransAdd_SeqBAIJ, 1161de6a44a3SBarry Smith MatSolve_SeqBAIJ,0, 1162b6490206SBarry Smith 0,0, 1163de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 1164b6490206SBarry Smith 0, 1165f2501298SSatish Balay MatTranspose_SeqBAIJ, 116617e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 11679867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1168584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 1169b6490206SBarry Smith 0, 1170b6490206SBarry Smith MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1171b6490206SBarry Smith MatGetReordering_SeqBAIJ, 11727fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1173b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1174de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 1175b6490206SBarry Smith 0,0,/* MatConvert_SeqBAIJ */ 0, 1176b6490206SBarry Smith 0,0, 1177b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 1178b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 1179b6490206SBarry Smith 0,0, 11807e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 1181b6490206SBarry Smith MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1182b6490206SBarry Smith 0}; 11832593348eSBarry Smith 11842593348eSBarry Smith /*@C 118544cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 118644cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 118744cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 11882593348eSBarry Smith (or nzz). By setting these parameters accurately, performance can be 11892593348eSBarry Smith increased by more than a factor of 50. 11902593348eSBarry Smith 11912593348eSBarry Smith Input Parameters: 11922593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 1193b6490206SBarry Smith . bs - size of block 11942593348eSBarry Smith . m - number of rows 11952593348eSBarry Smith . n - number of columns 1196b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1197b6490206SBarry Smith . nzz - number of block nonzeros per block row or PETSC_NULL 1198b6490206SBarry Smith (possibly different for each row) 11992593348eSBarry Smith 12002593348eSBarry Smith Output Parameter: 12012593348eSBarry Smith . A - the matrix 12022593348eSBarry Smith 12032593348eSBarry Smith Notes: 120444cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 12052593348eSBarry Smith storage. That is, the stored row and column indices can begin at 120644cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 12072593348eSBarry Smith 12082593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 12092593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 12102593348eSBarry Smith allocation. For additional details, see the users manual chapter on 12112593348eSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 12122593348eSBarry Smith 121344cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 12142593348eSBarry Smith @*/ 1215b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 12162593348eSBarry Smith { 12172593348eSBarry Smith Mat B; 1218b6490206SBarry Smith Mat_SeqBAIJ *b; 1219f2501298SSatish Balay int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1220b6490206SBarry Smith 1221f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 1222f2501298SSatish Balay SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 12232593348eSBarry Smith 12242593348eSBarry Smith *A = 0; 1225b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 12262593348eSBarry Smith PLogObjectCreate(B); 1227b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 122844cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 12292593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 12307fc0212eSBarry Smith switch (bs) { 12317fc0212eSBarry Smith case 1: 12327fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 12337fc0212eSBarry Smith break; 12344eeb42bcSBarry Smith case 2: 12354eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 12366d84be18SBarry Smith break; 1237f327dd97SBarry Smith case 3: 1238f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 12394eeb42bcSBarry Smith break; 12406d84be18SBarry Smith case 4: 12416d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 12426d84be18SBarry Smith break; 12436d84be18SBarry Smith case 5: 12446d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 12456d84be18SBarry Smith break; 12467fc0212eSBarry Smith } 1247b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1248b6490206SBarry Smith B->view = MatView_SeqBAIJ; 12492593348eSBarry Smith B->factor = 0; 12502593348eSBarry Smith B->lupivotthreshold = 1.0; 12512593348eSBarry Smith b->row = 0; 12522593348eSBarry Smith b->col = 0; 12532593348eSBarry Smith b->reallocs = 0; 12542593348eSBarry Smith 125544cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 125644cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1257b6490206SBarry Smith b->mbs = mbs; 1258f2501298SSatish Balay b->nbs = nbs; 1259b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 12602593348eSBarry Smith if (nnz == PETSC_NULL) { 1261119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 12622593348eSBarry Smith else if (nz <= 0) nz = 1; 1263b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1264b6490206SBarry Smith nz = nz*mbs; 12652593348eSBarry Smith } 12662593348eSBarry Smith else { 12672593348eSBarry Smith nz = 0; 1268b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 12692593348eSBarry Smith } 12702593348eSBarry Smith 12712593348eSBarry Smith /* allocate the matrix space */ 12727e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 12732593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 12747e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 12757e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 12762593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 12772593348eSBarry Smith b->i = b->j + nz; 12782593348eSBarry Smith b->singlemalloc = 1; 12792593348eSBarry Smith 1280de6a44a3SBarry Smith b->i[0] = 0; 1281b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 12822593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 12832593348eSBarry Smith } 12842593348eSBarry Smith 1285b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1286b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1287b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1288b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 12892593348eSBarry Smith 1290b6490206SBarry Smith b->bs = bs; 1291b6490206SBarry Smith b->mbs = mbs; 12922593348eSBarry Smith b->nz = 0; 12932593348eSBarry Smith b->maxnz = nz; 12942593348eSBarry Smith b->sorted = 0; 12952593348eSBarry Smith b->roworiented = 1; 12962593348eSBarry Smith b->nonew = 0; 12972593348eSBarry Smith b->diag = 0; 12982593348eSBarry Smith b->solve_work = 0; 1299de6a44a3SBarry Smith b->mult_work = 0; 13002593348eSBarry Smith b->spptr = 0; 13011073c447SSatish Balay b->indexshift = 0; 13022593348eSBarry Smith *A = B; 13032593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 13042593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 13052593348eSBarry Smith return 0; 13062593348eSBarry Smith } 13072593348eSBarry Smith 1308b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 13092593348eSBarry Smith { 13102593348eSBarry Smith Mat C; 1311b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 13127e67e3f9SSatish Balay int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1313de6a44a3SBarry Smith 1314de6a44a3SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 13152593348eSBarry Smith 13162593348eSBarry Smith *B = 0; 1317b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 13182593348eSBarry Smith PLogObjectCreate(C); 1319b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 13202593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1321b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1322b6490206SBarry Smith C->view = MatView_SeqBAIJ; 13232593348eSBarry Smith C->factor = A->factor; 13242593348eSBarry Smith c->row = 0; 13252593348eSBarry Smith c->col = 0; 13262593348eSBarry Smith C->assembled = PETSC_TRUE; 13272593348eSBarry Smith 132844cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 132944cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 133044cd7ae7SLois Curfman McInnes C->M = a->m; 133144cd7ae7SLois Curfman McInnes C->N = a->n; 133244cd7ae7SLois Curfman McInnes 1333b6490206SBarry Smith c->bs = a->bs; 1334b6490206SBarry Smith c->mbs = a->mbs; 1335b6490206SBarry Smith c->nbs = a->nbs; 13361073c447SSatish Balay c->indexshift = 0; 13372593348eSBarry Smith 1338b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1339b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1340b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 13412593348eSBarry Smith c->imax[i] = a->imax[i]; 13422593348eSBarry Smith c->ilen[i] = a->ilen[i]; 13432593348eSBarry Smith } 13442593348eSBarry Smith 13452593348eSBarry Smith /* allocate the matrix space */ 13462593348eSBarry Smith c->singlemalloc = 1; 13477e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 13482593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 13497e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1350de6a44a3SBarry Smith c->i = c->j + nz; 1351b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1352b6490206SBarry Smith if (mbs > 0) { 1353de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 13542593348eSBarry Smith if (cpvalues == COPY_VALUES) { 13557e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 13562593348eSBarry Smith } 13572593348eSBarry Smith } 13582593348eSBarry Smith 1359b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 13602593348eSBarry Smith c->sorted = a->sorted; 13612593348eSBarry Smith c->roworiented = a->roworiented; 13622593348eSBarry Smith c->nonew = a->nonew; 13632593348eSBarry Smith 13642593348eSBarry Smith if (a->diag) { 1365b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1366b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1367b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 13682593348eSBarry Smith c->diag[i] = a->diag[i]; 13692593348eSBarry Smith } 13702593348eSBarry Smith } 13712593348eSBarry Smith else c->diag = 0; 13722593348eSBarry Smith c->nz = a->nz; 13732593348eSBarry Smith c->maxnz = a->maxnz; 13742593348eSBarry Smith c->solve_work = 0; 13752593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 13767fc0212eSBarry Smith c->mult_work = 0; 13772593348eSBarry Smith *B = C; 13782593348eSBarry Smith return 0; 13792593348eSBarry Smith } 13802593348eSBarry Smith 138119bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 13822593348eSBarry Smith { 1383b6490206SBarry Smith Mat_SeqBAIJ *a; 13842593348eSBarry Smith Mat B; 1385de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1386b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 138735aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1388de6a44a3SBarry Smith int *masked, nmask,tmp,ishift,bs2; 1389b6490206SBarry Smith Scalar *aa; 139019bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 13912593348eSBarry Smith 139235aab85fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1393de6a44a3SBarry Smith bs2 = bs*bs; 1394b6490206SBarry Smith 13952593348eSBarry Smith MPI_Comm_size(comm,&size); 1396b6490206SBarry Smith if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 139790ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 139877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1399de6a44a3SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 14002593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 14012593348eSBarry Smith 1402b6490206SBarry Smith if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 140335aab85fSBarry Smith 140435aab85fSBarry Smith /* 140535aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 140635aab85fSBarry Smith divisible by the blocksize 140735aab85fSBarry Smith */ 1408b6490206SBarry Smith mbs = M/bs; 140935aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 141035aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 141135aab85fSBarry Smith else mbs++; 141235aab85fSBarry Smith if (extra_rows) { 141335aab85fSBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 141435aab85fSBarry Smith } 1415b6490206SBarry Smith 14162593348eSBarry Smith /* read in row lengths */ 141735aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 141877c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 141935aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 14202593348eSBarry Smith 1421b6490206SBarry Smith /* read in column indices */ 142235aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 142377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 142435aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1425b6490206SBarry Smith 1426b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1427b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1428b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 142935aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 143035aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 143135aab85fSBarry Smith masked = mask + mbs; 1432b6490206SBarry Smith rowcount = 0; nzcount = 0; 1433b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 143435aab85fSBarry Smith nmask = 0; 1435b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1436b6490206SBarry Smith kmax = rowlengths[rowcount]; 1437b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 143835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 143935aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1440b6490206SBarry Smith } 1441b6490206SBarry Smith rowcount++; 1442b6490206SBarry Smith } 144335aab85fSBarry Smith browlengths[i] += nmask; 144435aab85fSBarry Smith /* zero out the mask elements we set */ 144535aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1446b6490206SBarry Smith } 1447b6490206SBarry Smith 14482593348eSBarry Smith /* create our matrix */ 144935aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 145035aab85fSBarry Smith CHKERRQ(ierr); 14512593348eSBarry Smith B = *A; 1452b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 14532593348eSBarry Smith 14542593348eSBarry Smith /* set matrix "i" values */ 1455de6a44a3SBarry Smith a->i[0] = 0; 1456b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 1457b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1458b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 14592593348eSBarry Smith } 14609867e207SSatish Balay a->indexshift = 0; 1461b6490206SBarry Smith a->nz = 0; 1462b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 14632593348eSBarry Smith 1464b6490206SBarry Smith /* read in nonzero values */ 146535aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 146677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 146735aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1468b6490206SBarry Smith 1469b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1470b6490206SBarry Smith nzcount = 0; jcount = 0; 1471b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 1472b6490206SBarry Smith nzcountb = nzcount; 147335aab85fSBarry Smith nmask = 0; 1474b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1475b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1476b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 147735aab85fSBarry Smith tmp = jj[nzcount++]/bs; 147835aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1479b6490206SBarry Smith } 1480b6490206SBarry Smith rowcount++; 1481b6490206SBarry Smith } 1482de6a44a3SBarry Smith /* sort the masked values */ 148377c4ece6SBarry Smith PetscSortInt(nmask,masked); 1484de6a44a3SBarry Smith 1485b6490206SBarry Smith /* set "j" values into matrix */ 1486b6490206SBarry Smith maskcount = 1; 148735aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 148835aab85fSBarry Smith a->j[jcount++] = masked[j]; 1489de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1490b6490206SBarry Smith } 1491b6490206SBarry Smith /* set "a" values into matrix */ 1492de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1493b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1494b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1495b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 1496de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1497de6a44a3SBarry Smith block = mask[tmp] - 1; 1498de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1499de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1500b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 1501b6490206SBarry Smith } 1502b6490206SBarry Smith } 150335aab85fSBarry Smith /* zero out the mask elements we set */ 150435aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1505b6490206SBarry Smith } 150635aab85fSBarry Smith if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1507b6490206SBarry Smith 1508b6490206SBarry Smith PetscFree(rowlengths); 1509b6490206SBarry Smith PetscFree(browlengths); 1510b6490206SBarry Smith PetscFree(aa); 1511b6490206SBarry Smith PetscFree(jj); 1512b6490206SBarry Smith PetscFree(mask); 1513b6490206SBarry Smith 1514b6490206SBarry Smith B->assembled = PETSC_TRUE; 1515de6a44a3SBarry Smith 1516de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1517de6a44a3SBarry Smith if (flg) { 151819bcc07fSBarry Smith Viewer tviewer; 151919bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 152090ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 152119bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 152219bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1523de6a44a3SBarry Smith } 1524de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1525de6a44a3SBarry Smith if (flg) { 152619bcc07fSBarry Smith Viewer tviewer; 152719bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 152890ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 152919bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 153019bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1531de6a44a3SBarry Smith } 1532de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1533de6a44a3SBarry Smith if (flg) { 153419bcc07fSBarry Smith Viewer tviewer; 153519bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 153619bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 153719bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1538de6a44a3SBarry Smith } 1539de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1540de6a44a3SBarry Smith if (flg) { 154119bcc07fSBarry Smith Viewer tviewer; 154219bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 154390ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 154419bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 154519bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1546de6a44a3SBarry Smith } 1547de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1548de6a44a3SBarry Smith if (flg) { 154919bcc07fSBarry Smith Viewer tviewer; 155019bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 155119bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 155219bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 155319bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1554de6a44a3SBarry Smith } 15552593348eSBarry Smith return 0; 15562593348eSBarry Smith } 15572593348eSBarry Smith 15582593348eSBarry Smith 15592593348eSBarry Smith 1560