1b6490206SBarry Smith 22593348eSBarry Smith #ifndef lint 3*44cd7ae7SLois Curfman McInnes static char vcid[] = "$Id: baij.c,v 1.29 1996/04/06 00:00:43 balay Exp curfman $"; 42593348eSBarry Smith #endif 52593348eSBarry Smith 62593348eSBarry Smith /* 7b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 82593348eSBarry Smith matrix storage format. 92593348eSBarry Smith */ 10b6490206SBarry Smith #include "baij.h" 112593348eSBarry Smith #include "vec/vecimpl.h" 122593348eSBarry Smith #include "inline/spops.h" 132593348eSBarry Smith #include "petsc.h" 142593348eSBarry Smith 15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 162593348eSBarry Smith 17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 182593348eSBarry Smith { 19b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20bcd2baecSBarry Smith int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 212593348eSBarry Smith 222593348eSBarry Smith /* 232593348eSBarry Smith this is tacky: In the future when we have written special factorization 242593348eSBarry Smith and solve routines for the identity permutation we should use a 252593348eSBarry Smith stride index set instead of the general one. 262593348eSBarry Smith */ 272593348eSBarry Smith if (type == ORDER_NATURAL) { 282593348eSBarry Smith idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 292593348eSBarry Smith for ( i=0; i<n; i++ ) idx[i] = i; 302593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 312593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 322593348eSBarry Smith PetscFree(idx); 332593348eSBarry Smith ISSetPermutation(*rperm); 342593348eSBarry Smith ISSetPermutation(*cperm); 352593348eSBarry Smith ISSetIdentity(*rperm); 362593348eSBarry Smith ISSetIdentity(*cperm); 372593348eSBarry Smith return 0; 382593348eSBarry Smith } 392593348eSBarry Smith 40bcd2baecSBarry Smith MatReorderingRegisterAll(); 41bcd2baecSBarry Smith ishift = a->indexshift; 42bcd2baecSBarry Smith oshift = -MatReorderingIndexShift[(int)type]; 43bcd2baecSBarry Smith if (MatReorderingRequiresSymmetric[(int)type]) { 44bcd2baecSBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45bcd2baecSBarry Smith CHKERRQ(ierr); 46bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 472593348eSBarry Smith PetscFree(ia); PetscFree(ja); 48bcd2baecSBarry Smith } else { 49bcd2baecSBarry Smith if (ishift == oshift) { 50bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51bcd2baecSBarry Smith } 52bcd2baecSBarry Smith else if (ishift == -1) { 53bcd2baecSBarry Smith /* temporarily subtract 1 from i and j indices */ 54bcd2baecSBarry Smith int nz = a->i[a->n] - 1; 55bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 56bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 59bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60bcd2baecSBarry Smith } else { 61bcd2baecSBarry Smith /* temporarily add 1 to i and j indices */ 62bcd2baecSBarry Smith int nz = a->i[a->n] - 1; 63bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 64bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 67bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68bcd2baecSBarry Smith } 69bcd2baecSBarry Smith } 702593348eSBarry Smith return 0; 712593348eSBarry Smith } 722593348eSBarry Smith 73de6a44a3SBarry Smith /* 74de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 75de6a44a3SBarry Smith */ 76de6a44a3SBarry Smith 77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 78de6a44a3SBarry Smith { 79de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 807fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 81de6a44a3SBarry Smith 82de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 847fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 85de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86de6a44a3SBarry Smith if (a->j[j] == i) { 87de6a44a3SBarry Smith diag[i] = j; 88de6a44a3SBarry Smith break; 89de6a44a3SBarry Smith } 90de6a44a3SBarry Smith } 91de6a44a3SBarry Smith } 92de6a44a3SBarry Smith a->diag = diag; 93de6a44a3SBarry Smith return 0; 94de6a44a3SBarry Smith } 952593348eSBarry Smith 962593348eSBarry Smith #include "draw.h" 972593348eSBarry Smith #include "pinclude/pviewer.h" 9877c4ece6SBarry Smith #include "sys.h" 992593348eSBarry Smith 100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 1012593348eSBarry Smith { 102b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1037e67e3f9SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=bs*bs; 104b6490206SBarry Smith Scalar *aa; 1052593348eSBarry Smith 10690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1072593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1082593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1092593348eSBarry Smith col_lens[1] = a->m; 1102593348eSBarry Smith col_lens[2] = a->n; 1117e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1122593348eSBarry Smith 1132593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 114b6490206SBarry Smith count = 0; 115b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 116b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 117b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118b6490206SBarry Smith } 1192593348eSBarry Smith } 12077c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1212593348eSBarry Smith PetscFree(col_lens); 1222593348eSBarry Smith 1232593348eSBarry Smith /* store column indices (zero start index) */ 1247e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125b6490206SBarry Smith count = 0; 126b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 127b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 128b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 130b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1312593348eSBarry Smith } 1322593348eSBarry Smith } 133b6490206SBarry Smith } 134b6490206SBarry Smith } 1357e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136b6490206SBarry Smith PetscFree(jj); 1372593348eSBarry Smith 1382593348eSBarry Smith /* store nonzero values */ 1397e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140b6490206SBarry Smith count = 0; 141b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 142b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 143b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1457e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 146b6490206SBarry Smith } 147b6490206SBarry Smith } 148b6490206SBarry Smith } 149b6490206SBarry Smith } 1507e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151b6490206SBarry Smith PetscFree(aa); 1522593348eSBarry Smith return 0; 1532593348eSBarry Smith } 1542593348eSBarry Smith 155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1562593348eSBarry Smith { 157b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1587e67e3f9SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=bs*bs; 1592593348eSBarry Smith FILE *fd; 1602593348eSBarry Smith char *outputname; 1612593348eSBarry Smith 16290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1632593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16490ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 16590ace30eSBarry Smith if (format == ASCII_FORMAT_INFO) { 1662593348eSBarry Smith /* no need to print additional information */ ; 1672593348eSBarry Smith } 16890ace30eSBarry Smith else if (format == ASCII_FORMAT_MATLAB) { 169b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 1702593348eSBarry Smith } 171*44cd7ae7SLois Curfman McInnes else if (format == ASCII_FORMAT_COMMON) { 172*44cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 173*44cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 174*44cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 175*44cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176*44cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 177*44cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 178*44cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179*44cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180*44cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181*44cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182*44cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183*44cd7ae7SLois Curfman McInnes #else 184*44cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 185*44cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186*44cd7ae7SLois Curfman McInnes #endif 187*44cd7ae7SLois Curfman McInnes } 188*44cd7ae7SLois Curfman McInnes } 189*44cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 190*44cd7ae7SLois Curfman McInnes } 191*44cd7ae7SLois Curfman McInnes } 192*44cd7ae7SLois Curfman McInnes } 1932593348eSBarry Smith else { 194b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 195b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 196b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 197b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 2007e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 20188685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2027e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 20488685aaeSLois Curfman McInnes else { 2057e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20688685aaeSLois Curfman McInnes } 20788685aaeSLois Curfman McInnes #else 2087e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20988685aaeSLois Curfman McInnes #endif 2102593348eSBarry Smith } 2112593348eSBarry Smith } 2122593348eSBarry Smith fprintf(fd,"\n"); 2132593348eSBarry Smith } 2142593348eSBarry Smith } 215b6490206SBarry Smith } 2162593348eSBarry Smith fflush(fd); 2172593348eSBarry Smith return 0; 2182593348eSBarry Smith } 2192593348eSBarry Smith 220b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 2212593348eSBarry Smith { 2222593348eSBarry Smith Mat A = (Mat) obj; 22319bcc07fSBarry Smith ViewerType vtype; 22419bcc07fSBarry Smith int ierr; 2252593348eSBarry Smith 2262593348eSBarry Smith if (!viewer) { 22719bcc07fSBarry Smith viewer = STDOUT_VIEWER_SELF; 2282593348eSBarry Smith } 22919bcc07fSBarry Smith 23019bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 23119bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 232b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 2332593348eSBarry Smith } 23419bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 2362593348eSBarry Smith } 23719bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 238b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 2392593348eSBarry Smith } 24019bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 241b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 2422593348eSBarry Smith } 2432593348eSBarry Smith return 0; 2442593348eSBarry Smith } 245b6490206SBarry Smith 2467e67e3f9SSatish Balay #define CHUNKSIZE 1 247cd0e1443SSatish Balay 248cd0e1443SSatish Balay /* This version has row oriented v */ 249cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250cd0e1443SSatish Balay { 251cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254cd0e1443SSatish Balay int *aj=a->j,nonew=a->nonew,shift=a->indexshift,bs=a->bs,brow,bcol; 2557e67e3f9SSatish Balay int ridx,cidx,bs2=bs*bs; 256cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 257cd0e1443SSatish Balay 258cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 259cd0e1443SSatish Balay row = im[k]; brow = row/bs; 260cd0e1443SSatish Balay if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261cd0e1443SSatish Balay if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 2627e67e3f9SSatish Balay rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 263cd0e1443SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; 264cd0e1443SSatish Balay low = 0; 265cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 266cd0e1443SSatish Balay if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267cd0e1443SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268cd0e1443SSatish Balay col = in[l] - shift; bcol = col/bs; 269cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 270cd0e1443SSatish Balay if (roworiented) { 271cd0e1443SSatish Balay value = *v++; 272cd0e1443SSatish Balay } 273cd0e1443SSatish Balay else { 274cd0e1443SSatish Balay value = v[k + l*m]; 275cd0e1443SSatish Balay } 276cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 277cd0e1443SSatish Balay while (high-low > 5) { 278cd0e1443SSatish Balay t = (low+high)/2; 279cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 280cd0e1443SSatish Balay else low = t; 281cd0e1443SSatish Balay } 282cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 283cd0e1443SSatish Balay if (rp[i] > bcol) break; 284cd0e1443SSatish Balay if (rp[i] == bcol) { 2857e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 286cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 287cd0e1443SSatish Balay else *bap = value; 288cd0e1443SSatish Balay goto noinsert; 289cd0e1443SSatish Balay } 290cd0e1443SSatish Balay } 291cd0e1443SSatish Balay if (nonew) goto noinsert; 292cd0e1443SSatish Balay if (nrow >= rmax) { 293cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 294cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295cd0e1443SSatish Balay Scalar *new_a; 296cd0e1443SSatish Balay 297cd0e1443SSatish Balay /* malloc new storage space */ 2987e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 3007e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 301cd0e1443SSatish Balay new_i = new_j + new_nz; 302cd0e1443SSatish Balay 303cd0e1443SSatish Balay /* copy over old data into new slots */ 304cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 3057e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 3067e67e3f9SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow+shift)*sizeof(int)); 307cd0e1443SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow - shift); 308cd0e1443SSatish Balay PetscMemcpy(new_j+ai[brow]+shift+nrow+CHUNKSIZE,aj+ai[brow]+shift+nrow, 309cd0e1443SSatish Balay len*sizeof(int)); 3107e67e3f9SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow+shift)*bs2*sizeof(Scalar)); 3117e67e3f9SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+shift+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 3127e67e3f9SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+shift+nrow+CHUNKSIZE), 3137e67e3f9SSatish Balay aa+bs2*(ai[brow]+shift+nrow),bs2*len*sizeof(Scalar)); 314cd0e1443SSatish Balay /* free up old matrix storage */ 315cd0e1443SSatish Balay PetscFree(a->a); 316cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318cd0e1443SSatish Balay a->singlemalloc = 1; 319cd0e1443SSatish Balay 3207e67e3f9SSatish Balay rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 321cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 3227e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 3237e67e3f9SSatish Balay a->maxnz += bs2*CHUNKSIZE; 324cd0e1443SSatish Balay a->reallocs++; 3257e67e3f9SSatish Balay a->nz+=bs2; 326cd0e1443SSatish Balay } 3277e67e3f9SSatish Balay N = nrow++ - 1; 328cd0e1443SSatish Balay /* shift up all the later entries in this row */ 329cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 330cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 3317e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332cd0e1443SSatish Balay } 3337e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334cd0e1443SSatish Balay rp[i] = bcol; 3357e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 336cd0e1443SSatish Balay noinsert:; 337cd0e1443SSatish Balay low = i; 338cd0e1443SSatish Balay } 339cd0e1443SSatish Balay ailen[brow] = nrow; 340cd0e1443SSatish Balay } 341cd0e1443SSatish Balay return 0; 342cd0e1443SSatish Balay } 343cd0e1443SSatish Balay 3440b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 3450b824a48SSatish Balay { 3460b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3470b824a48SSatish Balay *m = a->m; *n = a->n; 3480b824a48SSatish Balay return 0; 3490b824a48SSatish Balay } 3500b824a48SSatish Balay 3510b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 3520b824a48SSatish Balay { 3530b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3540b824a48SSatish Balay *m = 0; *n = a->m; 3550b824a48SSatish Balay return 0; 3560b824a48SSatish Balay } 3570b824a48SSatish Balay 3589867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 3599867e207SSatish Balay { 3609867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3617e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 3629867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 3639867e207SSatish Balay 3649867e207SSatish Balay bs = a->bs; 3659867e207SSatish Balay ai = a->i; 3669867e207SSatish Balay aj = a->j; 3679867e207SSatish Balay aa = a->a; 3687e67e3f9SSatish Balay bs2 = bs*bs; 3699867e207SSatish Balay 3709867e207SSatish Balay if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 3719867e207SSatish Balay 3729867e207SSatish Balay bn = row/bs; /* Block number */ 3739867e207SSatish Balay bp = row % bs; /* Block Position */ 3749867e207SSatish Balay M = ai[bn+1] - ai[bn]; 3759867e207SSatish Balay *nz = bs*M; 3769867e207SSatish Balay 3779867e207SSatish Balay if (v) { 3789867e207SSatish Balay *v = 0; 3799867e207SSatish Balay if (*nz) { 3809867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 3819867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3829867e207SSatish Balay v_i = *v + i*bs; 3837e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 3847e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 3859867e207SSatish Balay } 3869867e207SSatish Balay } 3879867e207SSatish Balay } 3889867e207SSatish Balay 3899867e207SSatish Balay if (idx) { 3909867e207SSatish Balay *idx = 0; 3919867e207SSatish Balay if (*nz) { 3929867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 3939867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3949867e207SSatish Balay idx_i = *idx + i*bs; 3959867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 3969867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 3979867e207SSatish Balay } 3989867e207SSatish Balay } 3999867e207SSatish Balay } 4009867e207SSatish Balay return 0; 4019867e207SSatish Balay } 4029867e207SSatish Balay 4039867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4049867e207SSatish Balay { 4059867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 4069867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 4079867e207SSatish Balay return 0; 4089867e207SSatish Balay } 409b6490206SBarry Smith 410f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411f2501298SSatish Balay { 412f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413f2501298SSatish Balay Mat C; 414f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 415f2501298SSatish Balay int shift=a->indexshift,*rows,*cols,bs2=bs*bs; 416f2501298SSatish Balay Scalar *array=a->a; 417f2501298SSatish Balay 418f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 419f2501298SSatish Balay SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 422f2501298SSatish Balay if (shift) { 423f2501298SSatish Balay for ( i=0; i<ai[mbs]-1; i++ ) aj[i] -= 1; 424f2501298SSatish Balay } 425f2501298SSatish Balay for ( i=0; i<ai[mbs]+shift; i++ ) col[aj[i]] += 1; 426f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 427f2501298SSatish Balay PetscFree(col); 428f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 429f2501298SSatish Balay cols = rows + bs; 430f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 431f2501298SSatish Balay cols[0] = i*bs; 432f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 433f2501298SSatish Balay len = ai[i+1] - ai[i]; 434f2501298SSatish Balay for ( j=0; j<len; j++ ) { 435f2501298SSatish Balay rows[0] = (*aj++)*bs; 436f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 437f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 438f2501298SSatish Balay array += bs2; 439f2501298SSatish Balay } 440f2501298SSatish Balay } 4411073c447SSatish Balay PetscFree(rows); 442f2501298SSatish Balay if (shift) { 443f2501298SSatish Balay for ( i=0; i<ai[mbs]-1; i++ ) aj[i] += 1; 444f2501298SSatish Balay } 445f2501298SSatish Balay 446f2501298SSatish Balay ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 447f2501298SSatish Balay ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 448f2501298SSatish Balay 449f2501298SSatish Balay if (B != PETSC_NULL) { 450f2501298SSatish Balay *B = C; 451f2501298SSatish Balay } else { 452f2501298SSatish Balay /* This isn't really an in-place transpose */ 453f2501298SSatish Balay PetscFree(a->a); 454f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 455f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 456f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 457f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 458f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 459f2501298SSatish Balay PetscFree(a); 460f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 461f2501298SSatish Balay PetscHeaderDestroy(C); 462f2501298SSatish Balay } 463f2501298SSatish Balay return 0; 464f2501298SSatish Balay } 465f2501298SSatish Balay 466f2501298SSatish Balay 467584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 468584200bdSSatish Balay { 469584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 470584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 471584200bdSSatish Balay int m = a->m, *ip, N, *ailen = a->ilen,shift = a->indexshift; 472584200bdSSatish Balay int bs = a->bs, mbs = a->mbs, bs2 = bs*bs; 473584200bdSSatish Balay Scalar *aa = a->a, *ap; 474584200bdSSatish Balay 475584200bdSSatish Balay if (mode == FLUSH_ASSEMBLY) return 0; 476584200bdSSatish Balay 477584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 478584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 479584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 480584200bdSSatish Balay if (fshift) { 481584200bdSSatish Balay ip = aj + ai[i] + shift; ap = aa + bs2*ai[i] + shift; 482584200bdSSatish Balay N = ailen[i]; 483584200bdSSatish Balay for ( j=0; j<N; j++ ) { 484584200bdSSatish Balay ip[j-fshift] = ip[j]; 4857e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 486584200bdSSatish Balay } 487584200bdSSatish Balay } 488584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 489584200bdSSatish Balay } 490584200bdSSatish Balay if (mbs) { 491584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 492584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 493584200bdSSatish Balay } 494584200bdSSatish Balay /* reset ilen and imax for each row */ 495584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 496584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 497584200bdSSatish Balay } 498584200bdSSatish Balay a->nz = (ai[m] + shift)*bs2; 499584200bdSSatish Balay 500584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 501584200bdSSatish Balay if (fshift && a->diag) { 502584200bdSSatish Balay PetscFree(a->diag); 503584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 504584200bdSSatish Balay a->diag = 0; 505584200bdSSatish Balay } 506584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneeded storage space %d used %d rows %d\n", 507584200bdSSatish Balay fshift,a->nz,m); 508584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 509584200bdSSatish Balay a->reallocs); 510584200bdSSatish Balay return 0; 511584200bdSSatish Balay } 512584200bdSSatish Balay 513b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 5142593348eSBarry Smith { 515b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 516de6a44a3SBarry Smith PetscMemzero(a->a,a->bs*a->bs*a->i[a->mbs]*sizeof(Scalar)); 5172593348eSBarry Smith return 0; 5182593348eSBarry Smith } 5192593348eSBarry Smith 520b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 5212593348eSBarry Smith { 5222593348eSBarry Smith Mat A = (Mat) obj; 523b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5242593348eSBarry Smith 5252593348eSBarry Smith #if defined(PETSC_LOG) 5262593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 5272593348eSBarry Smith #endif 5282593348eSBarry Smith PetscFree(a->a); 5292593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 5302593348eSBarry Smith if (a->diag) PetscFree(a->diag); 5312593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 5322593348eSBarry Smith if (a->imax) PetscFree(a->imax); 5332593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 534de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 5352593348eSBarry Smith PetscFree(a); 536f2655603SLois Curfman McInnes PLogObjectDestroy(A); 537f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 5382593348eSBarry Smith return 0; 5392593348eSBarry Smith } 5402593348eSBarry Smith 541b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 5422593348eSBarry Smith { 543b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5442593348eSBarry Smith if (op == ROW_ORIENTED) a->roworiented = 1; 5452593348eSBarry Smith else if (op == COLUMN_ORIENTED) a->roworiented = 0; 5462593348eSBarry Smith else if (op == COLUMNS_SORTED) a->sorted = 1; 5472593348eSBarry Smith else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 5482593348eSBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 5492593348eSBarry Smith else if (op == ROWS_SORTED || 5502593348eSBarry Smith op == SYMMETRIC_MATRIX || 5512593348eSBarry Smith op == STRUCTURALLY_SYMMETRIC_MATRIX || 5522593348eSBarry Smith op == YES_NEW_DIAGONALS) 55394a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 5542593348eSBarry Smith else if (op == NO_NEW_DIAGONALS) 555b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 5562593348eSBarry Smith else 557b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 5582593348eSBarry Smith return 0; 5592593348eSBarry Smith } 5602593348eSBarry Smith 5612593348eSBarry Smith 5622593348eSBarry Smith /* -------------------------------------------------------*/ 5632593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 5642593348eSBarry Smith /* -------------------------------------------------------*/ 565b6490206SBarry Smith #include "pinclude/plapack.h" 566b6490206SBarry Smith 567b6490206SBarry Smith static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec yy) 5682593348eSBarry Smith { 569b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 570b6490206SBarry Smith Scalar *xg,*yg; 571b6490206SBarry Smith register Scalar *x, *y, *v, sum,*xb, sum1,sum2,sum3,sum4,sum5; 572b6490206SBarry Smith register Scalar x1,x2,x3,x4,x5; 573b6490206SBarry Smith int mbs = a->mbs, m = a->m, i, *idx,*ii; 574b6490206SBarry Smith int bs = a->bs,j,n,bs2 = bs*bs; 5752593348eSBarry Smith 576b6490206SBarry Smith VecGetArray(xx,&xg); x = xg; VecGetArray(yy,&yg); y = yg; 577b6490206SBarry Smith PetscMemzero(y,m*sizeof(Scalar)); 578b6490206SBarry Smith x = x; 5792593348eSBarry Smith idx = a->j; 5802593348eSBarry Smith v = a->a; 5812593348eSBarry Smith ii = a->i; 582b6490206SBarry Smith 583b6490206SBarry Smith switch (bs) { 584b6490206SBarry Smith case 1: 5852593348eSBarry Smith for ( i=0; i<m; i++ ) { 5862593348eSBarry Smith n = ii[1] - ii[0]; ii++; 5872593348eSBarry Smith sum = 0.0; 5882593348eSBarry Smith while (n--) sum += *v++ * x[*idx++]; 5892593348eSBarry Smith y[i] = sum; 5902593348eSBarry Smith } 5912593348eSBarry Smith break; 592b6490206SBarry Smith case 2: 593b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 594de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 595b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; 596b6490206SBarry Smith for ( j=0; j<n; j++ ) { 597b6490206SBarry Smith xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 598b6490206SBarry Smith sum1 += v[0]*x1 + v[2]*x2; 599b6490206SBarry Smith sum2 += v[1]*x1 + v[3]*x2; 600b6490206SBarry Smith v += 4; 601b6490206SBarry Smith } 602b6490206SBarry Smith y[0] += sum1; y[1] += sum2; 603b6490206SBarry Smith y += 2; 604b6490206SBarry Smith } 605b6490206SBarry Smith break; 606b6490206SBarry Smith case 3: 607b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 608de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 609b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 610b6490206SBarry Smith for ( j=0; j<n; j++ ) { 611b6490206SBarry Smith xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 612b6490206SBarry Smith sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 613b6490206SBarry Smith sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 614b6490206SBarry Smith sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 615b6490206SBarry Smith v += 9; 616b6490206SBarry Smith } 617b6490206SBarry Smith y[0] += sum1; y[1] += sum2; y[2] += sum3; 618b6490206SBarry Smith y += 3; 619b6490206SBarry Smith } 620b6490206SBarry Smith break; 621b6490206SBarry Smith case 4: 622b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 623de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 624b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 625b6490206SBarry Smith for ( j=0; j<n; j++ ) { 626b6490206SBarry Smith xb = x + 4*(*idx++); 627b6490206SBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 628b6490206SBarry Smith sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 629b6490206SBarry Smith sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 630b6490206SBarry Smith sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 631b6490206SBarry Smith sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 632b6490206SBarry Smith v += 16; 633b6490206SBarry Smith } 634b6490206SBarry Smith y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; 635b6490206SBarry Smith y += 4; 636b6490206SBarry Smith } 637b6490206SBarry Smith break; 638b6490206SBarry Smith case 5: 639b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 640de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 641b6490206SBarry Smith sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 642b6490206SBarry Smith for ( j=0; j<n; j++ ) { 643b6490206SBarry Smith xb = x + 5*(*idx++); 644b6490206SBarry Smith x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 645b6490206SBarry Smith sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 646b6490206SBarry Smith sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 647b6490206SBarry Smith sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 648b6490206SBarry Smith sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 649b6490206SBarry Smith sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 650b6490206SBarry Smith v += 25; 651b6490206SBarry Smith } 652b6490206SBarry Smith y[0] += sum1; y[1] += sum2; y[2] += sum3; y[3] += sum4; y[4] += sum5; 653b6490206SBarry Smith y += 5; 654b6490206SBarry Smith } 655b6490206SBarry Smith break; 656b6490206SBarry Smith /* block sizes larger then 5 by 5 are handled by BLAS */ 657b6490206SBarry Smith default: { 658de6a44a3SBarry Smith int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 659de6a44a3SBarry Smith if (!a->mult_work) { 660de6a44a3SBarry Smith a->mult_work = (Scalar *) PetscMalloc(a->m*sizeof(Scalar)); 661de6a44a3SBarry Smith CHKPTRQ(a->mult_work); 662de6a44a3SBarry Smith } 663de6a44a3SBarry Smith work = a->mult_work; 664b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 665de6a44a3SBarry Smith n = ii[1] - ii[0]; ii++; 666de6a44a3SBarry Smith ncols = n*bs; 667de6a44a3SBarry Smith workt = work; 668b6490206SBarry Smith for ( j=0; j<n; j++ ) { 669b6490206SBarry Smith xb = x + bs*(*idx++); 670de6a44a3SBarry Smith for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 671de6a44a3SBarry Smith workt += bs; 672b6490206SBarry Smith } 673de6a44a3SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,y,&_One); 674de6a44a3SBarry Smith v += n*bs2; 675b6490206SBarry Smith y += bs; 6762593348eSBarry Smith } 6772593348eSBarry Smith } 6782593348eSBarry Smith } 679b6490206SBarry Smith PLogFlops(2*bs2*a->nz - m); 6802593348eSBarry Smith return 0; 6812593348eSBarry Smith } 6822593348eSBarry Smith 683de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 6842593348eSBarry Smith { 685b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 686bcd2baecSBarry Smith if (nz) *nz = a->bs*a->bs*a->nz; 687bcd2baecSBarry Smith if (nza) *nza = a->maxnz; 688bcd2baecSBarry Smith if (mem) *mem = (int)A->mem; 6892593348eSBarry Smith return 0; 6902593348eSBarry Smith } 6912593348eSBarry Smith 69291d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 69391d316f6SSatish Balay { 69491d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 69591d316f6SSatish Balay 69691d316f6SSatish Balay if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 69791d316f6SSatish Balay 69891d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 69991d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 70091d316f6SSatish Balay (a->nz != b->nz)||(a->indexshift != b->indexshift)) { 70191d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 70291d316f6SSatish Balay } 70391d316f6SSatish Balay 70491d316f6SSatish Balay /* if the a->i are the same */ 70591d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 70691d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 70791d316f6SSatish Balay } 70891d316f6SSatish Balay 70991d316f6SSatish Balay /* if a->j are the same */ 71091d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 71191d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 71291d316f6SSatish Balay } 71391d316f6SSatish Balay 71491d316f6SSatish Balay /* if a->a are the same */ 71591d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 71691d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 71791d316f6SSatish Balay } 71891d316f6SSatish Balay *flg = PETSC_TRUE; 71991d316f6SSatish Balay return 0; 72091d316f6SSatish Balay 72191d316f6SSatish Balay } 72291d316f6SSatish Balay 72391d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 72491d316f6SSatish Balay { 72591d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 7267e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 72717e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 72817e48fc4SSatish Balay 72917e48fc4SSatish Balay bs = a->bs; 73017e48fc4SSatish Balay aa = a->a; 73117e48fc4SSatish Balay ai = a->i; 73217e48fc4SSatish Balay aj = a->j; 73317e48fc4SSatish Balay ambs = a->mbs; 7347e67e3f9SSatish Balay bs2 = bs*bs; 73591d316f6SSatish Balay 73691d316f6SSatish Balay VecSet(&zero,v); 73791d316f6SSatish Balay VecGetArray(v,&x); VecGetLocalSize(v,&n); 7389867e207SSatish Balay if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 73917e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 74017e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 74117e48fc4SSatish Balay if (aj[j] == i) { 74217e48fc4SSatish Balay row = i*bs; 7437e67e3f9SSatish Balay aa_j = aa+j*bs2; 7447e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 74591d316f6SSatish Balay break; 74691d316f6SSatish Balay } 74791d316f6SSatish Balay } 74891d316f6SSatish Balay } 74991d316f6SSatish Balay return 0; 75091d316f6SSatish Balay } 75191d316f6SSatish Balay 7529867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 7539867e207SSatish Balay { 7549867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 7559867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 7567e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 7579867e207SSatish Balay 7589867e207SSatish Balay ai = a->i; 7599867e207SSatish Balay aj = a->j; 7609867e207SSatish Balay aa = a->a; 7619867e207SSatish Balay m = a->m; 7629867e207SSatish Balay n = a->n; 7639867e207SSatish Balay bs = a->bs; 7649867e207SSatish Balay mbs = a->mbs; 7657e67e3f9SSatish Balay bs2 = bs*bs; 7669867e207SSatish Balay if (ll) { 7679867e207SSatish Balay VecGetArray(ll,&l); VecGetSize(ll,&lm); 7689867e207SSatish Balay if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 7699867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 7709867e207SSatish Balay M = ai[i+1] - ai[i]; 7719867e207SSatish Balay li = l + i*bs; 7727e67e3f9SSatish Balay v = aa + bs2*ai[i]; 7739867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 7747e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 7759867e207SSatish Balay (*v++) *= li[k%bs]; 7769867e207SSatish Balay } 7779867e207SSatish Balay } 7789867e207SSatish Balay } 7799867e207SSatish Balay } 7809867e207SSatish Balay 7819867e207SSatish Balay if (rr) { 7829867e207SSatish Balay VecGetArray(rr,&r); VecGetSize(rr,&rn); 7839867e207SSatish Balay if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 7849867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 7859867e207SSatish Balay M = ai[i+1] - ai[i]; 7867e67e3f9SSatish Balay v = aa + bs2*ai[i]; 7879867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 7889867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 7899867e207SSatish Balay for ( k=0; k<bs; k++ ) { 7909867e207SSatish Balay x = ri[k]; 7919867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 7929867e207SSatish Balay } 7939867e207SSatish Balay } 7949867e207SSatish Balay } 7959867e207SSatish Balay } 7969867e207SSatish Balay return 0; 7979867e207SSatish Balay } 7989867e207SSatish Balay 7999867e207SSatish Balay 800b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 801b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 802b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 8037fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 8047fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 8057fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 8067fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 8077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 8087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 8092593348eSBarry Smith 810b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 8112593348eSBarry Smith { 812b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8132593348eSBarry Smith Scalar *v = a->a; 8142593348eSBarry Smith double sum = 0.0; 815b6490206SBarry Smith int i; 8162593348eSBarry Smith 8172593348eSBarry Smith if (type == NORM_FROBENIUS) { 8182593348eSBarry Smith for (i=0; i<a->nz; i++ ) { 8192593348eSBarry Smith #if defined(PETSC_COMPLEX) 8202593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 8212593348eSBarry Smith #else 8222593348eSBarry Smith sum += (*v)*(*v); v++; 8232593348eSBarry Smith #endif 8242593348eSBarry Smith } 8252593348eSBarry Smith *norm = sqrt(sum); 8262593348eSBarry Smith } 8272593348eSBarry Smith else { 828b6490206SBarry Smith SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 8292593348eSBarry Smith } 8302593348eSBarry Smith return 0; 8312593348eSBarry Smith } 8322593348eSBarry Smith 8332593348eSBarry Smith /* 8342593348eSBarry Smith note: This can only work for identity for row and col. It would 8352593348eSBarry Smith be good to check this and otherwise generate an error. 8362593348eSBarry Smith */ 837b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 8382593348eSBarry Smith { 839b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 8402593348eSBarry Smith Mat outA; 841de6a44a3SBarry Smith int ierr; 8422593348eSBarry Smith 843b6490206SBarry Smith if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 8442593348eSBarry Smith 8452593348eSBarry Smith outA = inA; 8462593348eSBarry Smith inA->factor = FACTOR_LU; 8472593348eSBarry Smith a->row = row; 8482593348eSBarry Smith a->col = col; 8492593348eSBarry Smith 850de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 8512593348eSBarry Smith 8522593348eSBarry Smith if (!a->diag) { 853b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 8542593348eSBarry Smith } 8557fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 8562593348eSBarry Smith return 0; 8572593348eSBarry Smith } 8582593348eSBarry Smith 859b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 8602593348eSBarry Smith { 861b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 862b6490206SBarry Smith int one = 1, totalnz = a->bs*a->bs*a->nz; 863b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 864b6490206SBarry Smith PLogFlops(totalnz); 8652593348eSBarry Smith return 0; 8662593348eSBarry Smith } 8672593348eSBarry Smith 8687e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 8697e67e3f9SSatish Balay { 8707e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8717e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 8727e67e3f9SSatish Balay int *ai = a->i, *ailen = a->ilen, shift = a->indexshift; 8737e67e3f9SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=bs*bs; 8747e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 8757e67e3f9SSatish Balay 8767e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 8777e67e3f9SSatish Balay row = im[k]; brow = row/bs; 8787e67e3f9SSatish Balay if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 8797e67e3f9SSatish Balay if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 8807e67e3f9SSatish Balay rp = aj + ai[brow] + shift; ap = aa + bs2*ai[brow] + shift; 8817e67e3f9SSatish Balay nrow = ailen[brow]; 8827e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 8837e67e3f9SSatish Balay if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 8847e67e3f9SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 8857e67e3f9SSatish Balay col = in[l] - shift; 8867e67e3f9SSatish Balay bcol = col/bs; 8877e67e3f9SSatish Balay cidx = col%bs; 8887e67e3f9SSatish Balay ridx = row%bs; 8897e67e3f9SSatish Balay high = nrow; 8907e67e3f9SSatish Balay low = 0; /* assume unsorted */ 8917e67e3f9SSatish Balay while (high-low > 5) { 8927e67e3f9SSatish Balay t = (low+high)/2; 8937e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 8947e67e3f9SSatish Balay else low = t; 8957e67e3f9SSatish Balay } 8967e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 8977e67e3f9SSatish Balay if (rp[i] > bcol) break; 8987e67e3f9SSatish Balay if (rp[i] == bcol) { 8997e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 9007e67e3f9SSatish Balay goto finished; 9017e67e3f9SSatish Balay } 9027e67e3f9SSatish Balay } 9037e67e3f9SSatish Balay *v++ = zero; 9047e67e3f9SSatish Balay finished:; 9057e67e3f9SSatish Balay } 9067e67e3f9SSatish Balay } 9077e67e3f9SSatish Balay return 0; 9087e67e3f9SSatish Balay } 9097e67e3f9SSatish Balay 910b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A) 9112593348eSBarry Smith { 9122593348eSBarry Smith static int called = 0; 9132593348eSBarry Smith 9142593348eSBarry Smith if (called) return 0; else called = 1; 9152593348eSBarry Smith return 0; 9162593348eSBarry Smith } 9172593348eSBarry Smith /* -------------------------------------------------------------------*/ 918cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 9199867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 920b6490206SBarry Smith MatMult_SeqBAIJ,0, 921b6490206SBarry Smith 0,0, 922de6a44a3SBarry Smith MatSolve_SeqBAIJ,0, 923b6490206SBarry Smith 0,0, 924de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 925b6490206SBarry Smith 0, 926f2501298SSatish Balay MatTranspose_SeqBAIJ, 92717e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 9289867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 929584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 930b6490206SBarry Smith 0, 931b6490206SBarry Smith MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 932b6490206SBarry Smith MatGetReordering_SeqBAIJ, 9337fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 934b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 935de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 936b6490206SBarry Smith 0,0,/* MatConvert_SeqBAIJ */ 0, 937b6490206SBarry Smith 0,0, 938b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 939b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 940b6490206SBarry Smith 0,0, 9417e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 942b6490206SBarry Smith MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 943b6490206SBarry Smith 0}; 9442593348eSBarry Smith 9452593348eSBarry Smith /*@C 946*44cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 947*44cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 948*44cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 9492593348eSBarry Smith (or nzz). By setting these parameters accurately, performance can be 9502593348eSBarry Smith increased by more than a factor of 50. 9512593348eSBarry Smith 9522593348eSBarry Smith Input Parameters: 9532593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 954b6490206SBarry Smith . bs - size of block 9552593348eSBarry Smith . m - number of rows 9562593348eSBarry Smith . n - number of columns 957b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 958b6490206SBarry Smith . nzz - number of block nonzeros per block row or PETSC_NULL 959b6490206SBarry Smith (possibly different for each row) 9602593348eSBarry Smith 9612593348eSBarry Smith Output Parameter: 9622593348eSBarry Smith . A - the matrix 9632593348eSBarry Smith 9642593348eSBarry Smith Notes: 965*44cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 9662593348eSBarry Smith storage. That is, the stored row and column indices can begin at 967*44cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 9682593348eSBarry Smith 9692593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 9702593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 9712593348eSBarry Smith allocation. For additional details, see the users manual chapter on 9722593348eSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 9732593348eSBarry Smith 974*44cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 9752593348eSBarry Smith @*/ 976b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 9772593348eSBarry Smith { 9782593348eSBarry Smith Mat B; 979b6490206SBarry Smith Mat_SeqBAIJ *b; 980f2501298SSatish Balay int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 981b6490206SBarry Smith 982f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 983f2501298SSatish Balay SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 9842593348eSBarry Smith 9852593348eSBarry Smith *A = 0; 986b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 9872593348eSBarry Smith PLogObjectCreate(B); 988b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 989*44cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 9902593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 9917fc0212eSBarry Smith switch (bs) { 9927fc0212eSBarry Smith case 1: 9937fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 9947fc0212eSBarry Smith break; 9954eeb42bcSBarry Smith case 2: 9964eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 9976d84be18SBarry Smith break; 998f327dd97SBarry Smith case 3: 999f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 10004eeb42bcSBarry Smith break; 10016d84be18SBarry Smith case 4: 10026d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 10036d84be18SBarry Smith break; 10046d84be18SBarry Smith case 5: 10056d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 10066d84be18SBarry Smith break; 10077fc0212eSBarry Smith } 1008b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1009b6490206SBarry Smith B->view = MatView_SeqBAIJ; 10102593348eSBarry Smith B->factor = 0; 10112593348eSBarry Smith B->lupivotthreshold = 1.0; 10122593348eSBarry Smith b->row = 0; 10132593348eSBarry Smith b->col = 0; 10142593348eSBarry Smith b->reallocs = 0; 10152593348eSBarry Smith 1016*44cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 1017*44cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1018b6490206SBarry Smith b->mbs = mbs; 1019f2501298SSatish Balay b->nbs = nbs; 1020b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 10212593348eSBarry Smith if (nnz == PETSC_NULL) { 10227e67e3f9SSatish Balay if (nz == PETSC_DEFAULT) nz = CHUNKSIZE; 10232593348eSBarry Smith else if (nz <= 0) nz = 1; 1024b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1025b6490206SBarry Smith nz = nz*mbs; 10262593348eSBarry Smith } 10272593348eSBarry Smith else { 10282593348eSBarry Smith nz = 0; 1029b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 10302593348eSBarry Smith } 10312593348eSBarry Smith 10322593348eSBarry Smith /* allocate the matrix space */ 10337e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 10342593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 10357e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 10367e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 10372593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 10382593348eSBarry Smith b->i = b->j + nz; 10392593348eSBarry Smith b->singlemalloc = 1; 10402593348eSBarry Smith 1041de6a44a3SBarry Smith b->i[0] = 0; 1042b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 10432593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 10442593348eSBarry Smith } 10452593348eSBarry Smith 1046b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1047b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1048b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1049b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 10502593348eSBarry Smith 1051b6490206SBarry Smith b->bs = bs; 1052b6490206SBarry Smith b->mbs = mbs; 10532593348eSBarry Smith b->nz = 0; 10542593348eSBarry Smith b->maxnz = nz; 10552593348eSBarry Smith b->sorted = 0; 10562593348eSBarry Smith b->roworiented = 1; 10572593348eSBarry Smith b->nonew = 0; 10582593348eSBarry Smith b->diag = 0; 10592593348eSBarry Smith b->solve_work = 0; 1060de6a44a3SBarry Smith b->mult_work = 0; 10612593348eSBarry Smith b->spptr = 0; 10621073c447SSatish Balay b->indexshift = 0; 10632593348eSBarry Smith *A = B; 10642593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 10652593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 10662593348eSBarry Smith return 0; 10672593348eSBarry Smith } 10682593348eSBarry Smith 1069b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 10702593348eSBarry Smith { 10712593348eSBarry Smith Mat C; 1072b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 10737e67e3f9SSatish Balay int i,len, mbs = a->mbs, bs = a->bs,nz = a->nz,bs2 = bs*bs; 1074de6a44a3SBarry Smith 1075de6a44a3SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 10762593348eSBarry Smith 10772593348eSBarry Smith *B = 0; 1078b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 10792593348eSBarry Smith PLogObjectCreate(C); 1080b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 10812593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1082b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1083b6490206SBarry Smith C->view = MatView_SeqBAIJ; 10842593348eSBarry Smith C->factor = A->factor; 10852593348eSBarry Smith c->row = 0; 10862593348eSBarry Smith c->col = 0; 10872593348eSBarry Smith C->assembled = PETSC_TRUE; 10882593348eSBarry Smith 1089*44cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 1090*44cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 1091*44cd7ae7SLois Curfman McInnes C->M = a->m; 1092*44cd7ae7SLois Curfman McInnes C->N = a->n; 1093*44cd7ae7SLois Curfman McInnes 1094b6490206SBarry Smith c->bs = a->bs; 1095b6490206SBarry Smith c->mbs = a->mbs; 1096b6490206SBarry Smith c->nbs = a->nbs; 10971073c447SSatish Balay c->indexshift = 0; 10982593348eSBarry Smith 1099b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1100b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1101b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 11022593348eSBarry Smith c->imax[i] = a->imax[i]; 11032593348eSBarry Smith c->ilen[i] = a->ilen[i]; 11042593348eSBarry Smith } 11052593348eSBarry Smith 11062593348eSBarry Smith /* allocate the matrix space */ 11072593348eSBarry Smith c->singlemalloc = 1; 11087e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 11092593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 11107e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1111de6a44a3SBarry Smith c->i = c->j + nz; 1112b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1113b6490206SBarry Smith if (mbs > 0) { 1114de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 11152593348eSBarry Smith if (cpvalues == COPY_VALUES) { 11167e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 11172593348eSBarry Smith } 11182593348eSBarry Smith } 11192593348eSBarry Smith 1120b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 11212593348eSBarry Smith c->sorted = a->sorted; 11222593348eSBarry Smith c->roworiented = a->roworiented; 11232593348eSBarry Smith c->nonew = a->nonew; 11242593348eSBarry Smith 11252593348eSBarry Smith if (a->diag) { 1126b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1127b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1128b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 11292593348eSBarry Smith c->diag[i] = a->diag[i]; 11302593348eSBarry Smith } 11312593348eSBarry Smith } 11322593348eSBarry Smith else c->diag = 0; 11332593348eSBarry Smith c->nz = a->nz; 11342593348eSBarry Smith c->maxnz = a->maxnz; 11352593348eSBarry Smith c->solve_work = 0; 11362593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 11377fc0212eSBarry Smith c->mult_work = 0; 11382593348eSBarry Smith *B = C; 11392593348eSBarry Smith return 0; 11402593348eSBarry Smith } 11412593348eSBarry Smith 114219bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 11432593348eSBarry Smith { 1144b6490206SBarry Smith Mat_SeqBAIJ *a; 11452593348eSBarry Smith Mat B; 1146de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1147b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 114835aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1149de6a44a3SBarry Smith int *masked, nmask,tmp,ishift,bs2; 1150b6490206SBarry Smith Scalar *aa; 115119bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 11522593348eSBarry Smith 115335aab85fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1154de6a44a3SBarry Smith bs2 = bs*bs; 1155b6490206SBarry Smith 11562593348eSBarry Smith MPI_Comm_size(comm,&size); 1157b6490206SBarry Smith if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 115890ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 115977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1160de6a44a3SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 11612593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 11622593348eSBarry Smith 1163b6490206SBarry Smith if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 116435aab85fSBarry Smith 116535aab85fSBarry Smith /* 116635aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 116735aab85fSBarry Smith divisible by the blocksize 116835aab85fSBarry Smith */ 1169b6490206SBarry Smith mbs = M/bs; 117035aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 117135aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 117235aab85fSBarry Smith else mbs++; 117335aab85fSBarry Smith if (extra_rows) { 117435aab85fSBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 117535aab85fSBarry Smith } 1176b6490206SBarry Smith 11772593348eSBarry Smith /* read in row lengths */ 117835aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 117977c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 118035aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 11812593348eSBarry Smith 1182b6490206SBarry Smith /* read in column indices */ 118335aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 118477c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 118535aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1186b6490206SBarry Smith 1187b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1188b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1189b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 119035aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 119135aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 119235aab85fSBarry Smith masked = mask + mbs; 1193b6490206SBarry Smith rowcount = 0; nzcount = 0; 1194b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 119535aab85fSBarry Smith nmask = 0; 1196b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1197b6490206SBarry Smith kmax = rowlengths[rowcount]; 1198b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 119935aab85fSBarry Smith tmp = jj[nzcount++]/bs; 120035aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1201b6490206SBarry Smith } 1202b6490206SBarry Smith rowcount++; 1203b6490206SBarry Smith } 120435aab85fSBarry Smith browlengths[i] += nmask; 120535aab85fSBarry Smith /* zero out the mask elements we set */ 120635aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1207b6490206SBarry Smith } 1208b6490206SBarry Smith 12092593348eSBarry Smith /* create our matrix */ 121035aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 121135aab85fSBarry Smith CHKERRQ(ierr); 12122593348eSBarry Smith B = *A; 1213b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 12142593348eSBarry Smith 12152593348eSBarry Smith /* set matrix "i" values */ 1216de6a44a3SBarry Smith a->i[0] = 0; 1217b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 1218b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1219b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 12202593348eSBarry Smith } 12219867e207SSatish Balay a->indexshift = 0; 1222b6490206SBarry Smith a->nz = 0; 1223b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 12242593348eSBarry Smith 1225b6490206SBarry Smith /* read in nonzero values */ 122635aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 122777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 122835aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1229b6490206SBarry Smith 1230b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1231b6490206SBarry Smith nzcount = 0; jcount = 0; 1232b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 1233b6490206SBarry Smith nzcountb = nzcount; 123435aab85fSBarry Smith nmask = 0; 1235b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1236b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1237b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 123835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 123935aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1240b6490206SBarry Smith } 1241b6490206SBarry Smith rowcount++; 1242b6490206SBarry Smith } 1243de6a44a3SBarry Smith /* sort the masked values */ 124477c4ece6SBarry Smith PetscSortInt(nmask,masked); 1245de6a44a3SBarry Smith 1246b6490206SBarry Smith /* set "j" values into matrix */ 1247b6490206SBarry Smith maskcount = 1; 124835aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 124935aab85fSBarry Smith a->j[jcount++] = masked[j]; 1250de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1251b6490206SBarry Smith } 1252b6490206SBarry Smith /* set "a" values into matrix */ 1253de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1254b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1255b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1256b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 1257de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1258de6a44a3SBarry Smith block = mask[tmp] - 1; 1259de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1260de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1261b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 1262b6490206SBarry Smith } 1263b6490206SBarry Smith } 126435aab85fSBarry Smith /* zero out the mask elements we set */ 126535aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1266b6490206SBarry Smith } 126735aab85fSBarry Smith if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1268b6490206SBarry Smith 1269b6490206SBarry Smith PetscFree(rowlengths); 1270b6490206SBarry Smith PetscFree(browlengths); 1271b6490206SBarry Smith PetscFree(aa); 1272b6490206SBarry Smith PetscFree(jj); 1273b6490206SBarry Smith PetscFree(mask); 1274b6490206SBarry Smith 1275b6490206SBarry Smith B->assembled = PETSC_TRUE; 1276de6a44a3SBarry Smith 1277de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1278de6a44a3SBarry Smith if (flg) { 127919bcc07fSBarry Smith Viewer tviewer; 128019bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 128190ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 128219bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 128319bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1284de6a44a3SBarry Smith } 1285de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1286de6a44a3SBarry Smith if (flg) { 128719bcc07fSBarry Smith Viewer tviewer; 128819bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 128990ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 129019bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 129119bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1292de6a44a3SBarry Smith } 1293de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1294de6a44a3SBarry Smith if (flg) { 129519bcc07fSBarry Smith Viewer tviewer; 129619bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 129719bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 129819bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1299de6a44a3SBarry Smith } 1300de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1301de6a44a3SBarry Smith if (flg) { 130219bcc07fSBarry Smith Viewer tviewer; 130319bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 130490ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 130519bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 130619bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1307de6a44a3SBarry Smith } 1308de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1309de6a44a3SBarry Smith if (flg) { 131019bcc07fSBarry Smith Viewer tviewer; 131119bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 131219bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 131319bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 131419bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1315de6a44a3SBarry Smith } 13162593348eSBarry Smith return 0; 13172593348eSBarry Smith } 13182593348eSBarry Smith 13192593348eSBarry Smith 13202593348eSBarry Smith 1321