1b6490206SBarry Smith 22593348eSBarry Smith #ifndef lint 3*3b547af2SSatish Balay static char vcid[] = "$Id: baij.c,v 1.36 1996/04/11 16:15:49 balay Exp balay $"; 42593348eSBarry Smith #endif 52593348eSBarry Smith 62593348eSBarry Smith /* 7b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 82593348eSBarry Smith matrix storage format. 92593348eSBarry Smith */ 10b6490206SBarry Smith #include "baij.h" 112593348eSBarry Smith #include "vec/vecimpl.h" 122593348eSBarry Smith #include "inline/spops.h" 132593348eSBarry Smith #include "petsc.h" 14*3b547af2SSatish Balay 15bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 162593348eSBarry Smith 17b6490206SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 182593348eSBarry Smith { 19b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20bcd2baecSBarry Smith int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 212593348eSBarry Smith 222593348eSBarry Smith /* 232593348eSBarry Smith this is tacky: In the future when we have written special factorization 242593348eSBarry Smith and solve routines for the identity permutation we should use a 252593348eSBarry Smith stride index set instead of the general one. 262593348eSBarry Smith */ 272593348eSBarry Smith if (type == ORDER_NATURAL) { 282593348eSBarry Smith idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 292593348eSBarry Smith for ( i=0; i<n; i++ ) idx[i] = i; 302593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 312593348eSBarry Smith ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 322593348eSBarry Smith PetscFree(idx); 332593348eSBarry Smith ISSetPermutation(*rperm); 342593348eSBarry Smith ISSetPermutation(*cperm); 352593348eSBarry Smith ISSetIdentity(*rperm); 362593348eSBarry Smith ISSetIdentity(*cperm); 372593348eSBarry Smith return 0; 382593348eSBarry Smith } 392593348eSBarry Smith 40bcd2baecSBarry Smith MatReorderingRegisterAll(); 41a7c10996SSatish Balay ishift = 0; 42bcd2baecSBarry Smith oshift = -MatReorderingIndexShift[(int)type]; 43bcd2baecSBarry Smith if (MatReorderingRequiresSymmetric[(int)type]) { 44bcd2baecSBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45bcd2baecSBarry Smith CHKERRQ(ierr); 46bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 472593348eSBarry Smith PetscFree(ia); PetscFree(ja); 48bcd2baecSBarry Smith } else { 49bcd2baecSBarry Smith if (ishift == oshift) { 50bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51bcd2baecSBarry Smith } 52bcd2baecSBarry Smith else if (ishift == -1) { 53bcd2baecSBarry Smith /* temporarily subtract 1 from i and j indices */ 54bcd2baecSBarry Smith int nz = a->i[a->n] - 1; 55bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 56bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 59bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60bcd2baecSBarry Smith } else { 61bcd2baecSBarry Smith /* temporarily add 1 to i and j indices */ 62bcd2baecSBarry Smith int nz = a->i[a->n] - 1; 63bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 64bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65bcd2baecSBarry Smith ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 67bcd2baecSBarry Smith for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68bcd2baecSBarry Smith } 69bcd2baecSBarry Smith } 702593348eSBarry Smith return 0; 712593348eSBarry Smith } 722593348eSBarry Smith 73de6a44a3SBarry Smith /* 74de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 75de6a44a3SBarry Smith */ 76de6a44a3SBarry Smith 77de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 78de6a44a3SBarry Smith { 79de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 807fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 81de6a44a3SBarry Smith 82de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 847fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 85de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86de6a44a3SBarry Smith if (a->j[j] == i) { 87de6a44a3SBarry Smith diag[i] = j; 88de6a44a3SBarry Smith break; 89de6a44a3SBarry Smith } 90de6a44a3SBarry Smith } 91de6a44a3SBarry Smith } 92de6a44a3SBarry Smith a->diag = diag; 93de6a44a3SBarry Smith return 0; 94de6a44a3SBarry Smith } 952593348eSBarry Smith 962593348eSBarry Smith #include "draw.h" 972593348eSBarry Smith #include "pinclude/pviewer.h" 9877c4ece6SBarry Smith #include "sys.h" 992593348eSBarry Smith 100b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 1012593348eSBarry Smith { 102b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1039df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 104b6490206SBarry Smith Scalar *aa; 1052593348eSBarry Smith 10690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1072593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1082593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1092593348eSBarry Smith col_lens[1] = a->m; 1102593348eSBarry Smith col_lens[2] = a->n; 1117e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1122593348eSBarry Smith 1132593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 114b6490206SBarry Smith count = 0; 115b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 116b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 117b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118b6490206SBarry Smith } 1192593348eSBarry Smith } 12077c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1212593348eSBarry Smith PetscFree(col_lens); 1222593348eSBarry Smith 1232593348eSBarry Smith /* store column indices (zero start index) */ 1247e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125b6490206SBarry Smith count = 0; 126b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 127b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 128b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 130b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1312593348eSBarry Smith } 1322593348eSBarry Smith } 133b6490206SBarry Smith } 134b6490206SBarry Smith } 1357e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136b6490206SBarry Smith PetscFree(jj); 1372593348eSBarry Smith 1382593348eSBarry Smith /* store nonzero values */ 1397e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140b6490206SBarry Smith count = 0; 141b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 142b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 143b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1457e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 146b6490206SBarry Smith } 147b6490206SBarry Smith } 148b6490206SBarry Smith } 149b6490206SBarry Smith } 1507e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151b6490206SBarry Smith PetscFree(aa); 1522593348eSBarry Smith return 0; 1532593348eSBarry Smith } 1542593348eSBarry Smith 155b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1562593348eSBarry Smith { 157b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1589df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1592593348eSBarry Smith FILE *fd; 1602593348eSBarry Smith char *outputname; 1612593348eSBarry Smith 16290ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1632593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16490ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 1657ddc982cSLois Curfman McInnes if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 1667ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1672593348eSBarry Smith } 16890ace30eSBarry Smith else if (format == ASCII_FORMAT_MATLAB) { 169b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 1702593348eSBarry Smith } 17144cd7ae7SLois Curfman McInnes else if (format == ASCII_FORMAT_COMMON) { 17244cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17344cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17444cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17544cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17644cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17744cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 17844cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17944cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 18044cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 18144cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18344cd7ae7SLois Curfman McInnes #else 18444cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18544cd7ae7SLois Curfman McInnes fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18644cd7ae7SLois Curfman McInnes #endif 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 19044cd7ae7SLois Curfman McInnes } 19144cd7ae7SLois Curfman McInnes } 19244cd7ae7SLois Curfman McInnes } 1932593348eSBarry Smith else { 194b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 195b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 196b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 197b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 2007e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 20188685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2027e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 20488685aaeSLois Curfman McInnes else { 2057e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20688685aaeSLois Curfman McInnes } 20788685aaeSLois Curfman McInnes #else 2087e67e3f9SSatish Balay fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20988685aaeSLois Curfman McInnes #endif 2102593348eSBarry Smith } 2112593348eSBarry Smith } 2122593348eSBarry Smith fprintf(fd,"\n"); 2132593348eSBarry Smith } 2142593348eSBarry Smith } 215b6490206SBarry Smith } 2162593348eSBarry Smith fflush(fd); 2172593348eSBarry Smith return 0; 2182593348eSBarry Smith } 2192593348eSBarry Smith 220b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 2212593348eSBarry Smith { 2222593348eSBarry Smith Mat A = (Mat) obj; 22319bcc07fSBarry Smith ViewerType vtype; 22419bcc07fSBarry Smith int ierr; 2252593348eSBarry Smith 2262593348eSBarry Smith if (!viewer) { 22719bcc07fSBarry Smith viewer = STDOUT_VIEWER_SELF; 2282593348eSBarry Smith } 22919bcc07fSBarry Smith 23019bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 23119bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 232b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 2332593348eSBarry Smith } 23419bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 2362593348eSBarry Smith } 23719bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 238b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 2392593348eSBarry Smith } 24019bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 241b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 2422593348eSBarry Smith } 2432593348eSBarry Smith return 0; 2442593348eSBarry Smith } 245b6490206SBarry Smith 246119a934aSSatish Balay #define CHUNKSIZE 10 247cd0e1443SSatish Balay 248cd0e1443SSatish Balay /* This version has row oriented v */ 249cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250cd0e1443SSatish Balay { 251cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 2559df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 256cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 257cd0e1443SSatish Balay 258cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 259cd0e1443SSatish Balay row = im[k]; brow = row/bs; 260cd0e1443SSatish Balay if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261cd0e1443SSatish Balay if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 262a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 263cd0e1443SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; 264cd0e1443SSatish Balay low = 0; 265cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 266cd0e1443SSatish Balay if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267cd0e1443SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268a7c10996SSatish Balay col = in[l]; bcol = col/bs; 269cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 270cd0e1443SSatish Balay if (roworiented) { 271cd0e1443SSatish Balay value = *v++; 272cd0e1443SSatish Balay } 273cd0e1443SSatish Balay else { 274cd0e1443SSatish Balay value = v[k + l*m]; 275cd0e1443SSatish Balay } 276cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 277cd0e1443SSatish Balay while (high-low > 5) { 278cd0e1443SSatish Balay t = (low+high)/2; 279cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 280cd0e1443SSatish Balay else low = t; 281cd0e1443SSatish Balay } 282cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 283cd0e1443SSatish Balay if (rp[i] > bcol) break; 284cd0e1443SSatish Balay if (rp[i] == bcol) { 2857e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 286cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 287cd0e1443SSatish Balay else *bap = value; 288cd0e1443SSatish Balay goto noinsert; 289cd0e1443SSatish Balay } 290cd0e1443SSatish Balay } 291cd0e1443SSatish Balay if (nonew) goto noinsert; 292cd0e1443SSatish Balay if (nrow >= rmax) { 293cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 294cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295cd0e1443SSatish Balay Scalar *new_a; 296cd0e1443SSatish Balay 297cd0e1443SSatish Balay /* malloc new storage space */ 2987e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 3007e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 301cd0e1443SSatish Balay new_i = new_j + new_nz; 302cd0e1443SSatish Balay 303cd0e1443SSatish Balay /* copy over old data into new slots */ 304cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 3057e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 306a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 307a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 308a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 309cd0e1443SSatish Balay len*sizeof(int)); 310a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 311a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 312a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 313a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 314cd0e1443SSatish Balay /* free up old matrix storage */ 315cd0e1443SSatish Balay PetscFree(a->a); 316cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318cd0e1443SSatish Balay a->singlemalloc = 1; 319cd0e1443SSatish Balay 320a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 321cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 3227e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 323119a934aSSatish Balay a->maxnz += CHUNKSIZE; 324cd0e1443SSatish Balay a->reallocs++; 325119a934aSSatish Balay a->nz++; 326cd0e1443SSatish Balay } 3277e67e3f9SSatish Balay N = nrow++ - 1; 328cd0e1443SSatish Balay /* shift up all the later entries in this row */ 329cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 330cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 3317e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332cd0e1443SSatish Balay } 3337e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334cd0e1443SSatish Balay rp[i] = bcol; 3357e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 336cd0e1443SSatish Balay noinsert:; 337cd0e1443SSatish Balay low = i; 338cd0e1443SSatish Balay } 339cd0e1443SSatish Balay ailen[brow] = nrow; 340cd0e1443SSatish Balay } 341cd0e1443SSatish Balay return 0; 342cd0e1443SSatish Balay } 343cd0e1443SSatish Balay 3440b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 3450b824a48SSatish Balay { 3460b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3470b824a48SSatish Balay *m = a->m; *n = a->n; 3480b824a48SSatish Balay return 0; 3490b824a48SSatish Balay } 3500b824a48SSatish Balay 3510b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 3520b824a48SSatish Balay { 3530b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3540b824a48SSatish Balay *m = 0; *n = a->m; 3550b824a48SSatish Balay return 0; 3560b824a48SSatish Balay } 3570b824a48SSatish Balay 3589867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 3599867e207SSatish Balay { 3609867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 3617e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 3629867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 3639867e207SSatish Balay 3649867e207SSatish Balay bs = a->bs; 3659867e207SSatish Balay ai = a->i; 3669867e207SSatish Balay aj = a->j; 3679867e207SSatish Balay aa = a->a; 3689df24120SSatish Balay bs2 = a->bs2; 3699867e207SSatish Balay 3709867e207SSatish Balay if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 3719867e207SSatish Balay 3729867e207SSatish Balay bn = row/bs; /* Block number */ 3739867e207SSatish Balay bp = row % bs; /* Block Position */ 3749867e207SSatish Balay M = ai[bn+1] - ai[bn]; 3759867e207SSatish Balay *nz = bs*M; 3769867e207SSatish Balay 3779867e207SSatish Balay if (v) { 3789867e207SSatish Balay *v = 0; 3799867e207SSatish Balay if (*nz) { 3809867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 3819867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3829867e207SSatish Balay v_i = *v + i*bs; 3837e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 3847e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 3859867e207SSatish Balay } 3869867e207SSatish Balay } 3879867e207SSatish Balay } 3889867e207SSatish Balay 3899867e207SSatish Balay if (idx) { 3909867e207SSatish Balay *idx = 0; 3919867e207SSatish Balay if (*nz) { 3929867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 3939867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 3949867e207SSatish Balay idx_i = *idx + i*bs; 3959867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 3969867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 3979867e207SSatish Balay } 3989867e207SSatish Balay } 3999867e207SSatish Balay } 4009867e207SSatish Balay return 0; 4019867e207SSatish Balay } 4029867e207SSatish Balay 4039867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4049867e207SSatish Balay { 4059867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 4069867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 4079867e207SSatish Balay return 0; 4089867e207SSatish Balay } 409b6490206SBarry Smith 410f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411f2501298SSatish Balay { 412f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413f2501298SSatish Balay Mat C; 414f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 4159df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 416f2501298SSatish Balay Scalar *array=a->a; 417f2501298SSatish Balay 418f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 419f2501298SSatish Balay SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 422a7c10996SSatish Balay 423a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 424f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 425f2501298SSatish Balay PetscFree(col); 426f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 427f2501298SSatish Balay cols = rows + bs; 428f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 429f2501298SSatish Balay cols[0] = i*bs; 430f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 431f2501298SSatish Balay len = ai[i+1] - ai[i]; 432f2501298SSatish Balay for ( j=0; j<len; j++ ) { 433f2501298SSatish Balay rows[0] = (*aj++)*bs; 434f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 435f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 436f2501298SSatish Balay array += bs2; 437f2501298SSatish Balay } 438f2501298SSatish Balay } 4391073c447SSatish Balay PetscFree(rows); 440f2501298SSatish Balay 441f2501298SSatish Balay ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442f2501298SSatish Balay ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 443f2501298SSatish Balay 444f2501298SSatish Balay if (B != PETSC_NULL) { 445f2501298SSatish Balay *B = C; 446f2501298SSatish Balay } else { 447f2501298SSatish Balay /* This isn't really an in-place transpose */ 448f2501298SSatish Balay PetscFree(a->a); 449f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 450f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 451f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 452f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 453f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 454f2501298SSatish Balay PetscFree(a); 455f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 456f2501298SSatish Balay PetscHeaderDestroy(C); 457f2501298SSatish Balay } 458f2501298SSatish Balay return 0; 459f2501298SSatish Balay } 460f2501298SSatish Balay 461f2501298SSatish Balay 462584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 463584200bdSSatish Balay { 464584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 465584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 466a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 4679df24120SSatish Balay int mbs = a->mbs, bs2 = a->bs2; 468584200bdSSatish Balay Scalar *aa = a->a, *ap; 469584200bdSSatish Balay 470584200bdSSatish Balay if (mode == FLUSH_ASSEMBLY) return 0; 471584200bdSSatish Balay 472584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 473584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 474584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 475584200bdSSatish Balay if (fshift) { 476a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 477584200bdSSatish Balay N = ailen[i]; 478584200bdSSatish Balay for ( j=0; j<N; j++ ) { 479584200bdSSatish Balay ip[j-fshift] = ip[j]; 4807e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 481584200bdSSatish Balay } 482584200bdSSatish Balay } 483584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 484584200bdSSatish Balay } 485584200bdSSatish Balay if (mbs) { 486584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 487584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 488584200bdSSatish Balay } 489584200bdSSatish Balay /* reset ilen and imax for each row */ 490584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 491584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 492584200bdSSatish Balay } 493a7c10996SSatish Balay a->nz = ai[mbs]; 494584200bdSSatish Balay 495584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 496584200bdSSatish Balay if (fshift && a->diag) { 497584200bdSSatish Balay PetscFree(a->diag); 498584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 499584200bdSSatish Balay a->diag = 0; 500584200bdSSatish Balay } 501a7c10996SSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 502584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 503584200bdSSatish Balay a->reallocs); 504584200bdSSatish Balay return 0; 505584200bdSSatish Balay } 506584200bdSSatish Balay 507b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 5082593348eSBarry Smith { 509b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5109df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 5112593348eSBarry Smith return 0; 5122593348eSBarry Smith } 5132593348eSBarry Smith 514b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 5152593348eSBarry Smith { 5162593348eSBarry Smith Mat A = (Mat) obj; 517b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5182593348eSBarry Smith 5192593348eSBarry Smith #if defined(PETSC_LOG) 5202593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 5212593348eSBarry Smith #endif 5222593348eSBarry Smith PetscFree(a->a); 5232593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 5242593348eSBarry Smith if (a->diag) PetscFree(a->diag); 5252593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 5262593348eSBarry Smith if (a->imax) PetscFree(a->imax); 5272593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 528de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 5292593348eSBarry Smith PetscFree(a); 530f2655603SLois Curfman McInnes PLogObjectDestroy(A); 531f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 5322593348eSBarry Smith return 0; 5332593348eSBarry Smith } 5342593348eSBarry Smith 535b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 5362593348eSBarry Smith { 537b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 5382593348eSBarry Smith if (op == ROW_ORIENTED) a->roworiented = 1; 5392593348eSBarry Smith else if (op == COLUMN_ORIENTED) a->roworiented = 0; 5402593348eSBarry Smith else if (op == COLUMNS_SORTED) a->sorted = 1; 5412593348eSBarry Smith else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 5422593348eSBarry Smith else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 5432593348eSBarry Smith else if (op == ROWS_SORTED || 5442593348eSBarry Smith op == SYMMETRIC_MATRIX || 5452593348eSBarry Smith op == STRUCTURALLY_SYMMETRIC_MATRIX || 5462593348eSBarry Smith op == YES_NEW_DIAGONALS) 54794a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 5482593348eSBarry Smith else if (op == NO_NEW_DIAGONALS) 549b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 5502593348eSBarry Smith else 551b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 5522593348eSBarry Smith return 0; 5532593348eSBarry Smith } 5542593348eSBarry Smith 5552593348eSBarry Smith 5562593348eSBarry Smith /* -------------------------------------------------------*/ 5572593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 5582593348eSBarry Smith /* -------------------------------------------------------*/ 559b6490206SBarry Smith #include "pinclude/plapack.h" 560b6490206SBarry Smith 561bb42667fSSatish Balay static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 5622593348eSBarry Smith { 563b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 564bb42667fSSatish Balay Scalar *xg,*y,*zg; 565bb42667fSSatish Balay register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 566b6490206SBarry Smith register Scalar x1,x2,x3,x4,x5; 567b6490206SBarry Smith int mbs=a->mbs,m=a->m,i,*idx,*ii; 5689df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,ierr; 5692593348eSBarry Smith 570bb42667fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 571bb42667fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 572b6490206SBarry Smith 573bb42667fSSatish Balay if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */ 574bb42667fSSatish Balay else if (zz!=yy){ 575bb42667fSSatish Balay ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 576bb42667fSSatish Balay PetscMemcpy(z,y,m*sizeof(Scalar)); 5770419e6cdSSatish Balay ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr); 5782593348eSBarry Smith } 579119a934aSSatish Balay 580119a934aSSatish Balay idx = a->j; 581119a934aSSatish Balay v = a->a; 582119a934aSSatish Balay ii = a->i; 583119a934aSSatish Balay 584119a934aSSatish Balay switch (bs) { 585119a934aSSatish Balay case 1: 586119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 587119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 588119a934aSSatish Balay sum = 0.0; 589119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 590119a934aSSatish Balay z[i] += sum; 591119a934aSSatish Balay } 592119a934aSSatish Balay break; 593119a934aSSatish Balay case 2: 594119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 595119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 596119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 597119a934aSSatish Balay for ( j=0; j<n; j++ ) { 598119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 599119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 600119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 601119a934aSSatish Balay v += 4; 602119a934aSSatish Balay } 603119a934aSSatish Balay z[0] += sum1; z[1] += sum2; 604119a934aSSatish Balay z += 2; 605119a934aSSatish Balay } 606119a934aSSatish Balay break; 607119a934aSSatish Balay case 3: 608119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 609119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 610119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 611119a934aSSatish Balay for ( j=0; j<n; j++ ) { 612119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 613119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 614119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 615119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 616119a934aSSatish Balay v += 9; 617119a934aSSatish Balay } 618119a934aSSatish Balay z[0] += sum1; z[1] += sum2; z[2] += sum3; 619119a934aSSatish Balay z += 3; 620119a934aSSatish Balay } 621119a934aSSatish Balay break; 622119a934aSSatish Balay case 4: 623119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 624119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 625119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 626119a934aSSatish Balay for ( j=0; j<n; j++ ) { 627119a934aSSatish Balay xb = x + 4*(*idx++); 628119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 629119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 630119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 631119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 632119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 633119a934aSSatish Balay v += 16; 634119a934aSSatish Balay } 635119a934aSSatish Balay z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; 636119a934aSSatish Balay z += 4; 637119a934aSSatish Balay } 638119a934aSSatish Balay break; 639119a934aSSatish Balay case 5: 640119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 641119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 642119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 643119a934aSSatish Balay for ( j=0; j<n; j++ ) { 644119a934aSSatish Balay xb = x + 5*(*idx++); 645119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 646119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 647119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 648119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 649119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 650119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 651119a934aSSatish Balay v += 25; 652119a934aSSatish Balay } 653119a934aSSatish Balay z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5; 654119a934aSSatish Balay z += 5; 655119a934aSSatish Balay } 656119a934aSSatish Balay break; 657119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 658119a934aSSatish Balay default: { 659119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 660119a934aSSatish Balay if (!a->mult_work) { 661*3b547af2SSatish Balay k = PetscMax(a->m,a->n); 662bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 663119a934aSSatish Balay CHKPTRQ(a->mult_work); 664119a934aSSatish Balay } 665119a934aSSatish Balay work = a->mult_work; 666119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 667119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 668119a934aSSatish Balay ncols = n*bs; 669119a934aSSatish Balay workt = work; 670119a934aSSatish Balay for ( j=0; j<n; j++ ) { 671119a934aSSatish Balay xb = x + bs*(*idx++); 672119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 673119a934aSSatish Balay workt += bs; 674119a934aSSatish Balay } 675119a934aSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 676119a934aSSatish Balay v += n*bs2; 677119a934aSSatish Balay z += bs; 678119a934aSSatish Balay } 679119a934aSSatish Balay } 680119a934aSSatish Balay } 6810419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 6820419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 683bb42667fSSatish Balay return 0; 684bb42667fSSatish Balay } 685bb42667fSSatish Balay 686bb42667fSSatish Balay static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy) 687bb42667fSSatish Balay { 688bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6899df24120SSatish Balay int ierr,bs2=a->bs2; 690bb42667fSSatish Balay 691bb42667fSSatish Balay ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 692bb42667fSSatish Balay PLogFlops(2*bs2*(a->nz)-a->m); 693bb42667fSSatish Balay return 0; 694bb42667fSSatish Balay } 695bb42667fSSatish Balay 696bb42667fSSatish Balay static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 697bb42667fSSatish Balay { 698bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6999df24120SSatish Balay int ierr,bs2=a->bs2; 700bb42667fSSatish Balay 701bb42667fSSatish Balay ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 702119a934aSSatish Balay PLogFlops(2*bs2*(a->nz)); 703119a934aSSatish Balay return 0; 704119a934aSSatish Balay } 705119a934aSSatish Balay 706bb42667fSSatish Balay static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 707119a934aSSatish Balay { 708119a934aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709bb42667fSSatish Balay Scalar *xg,*y,*zg,*zb; 710bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 711119a934aSSatish Balay int mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval; 7129df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 713119a934aSSatish Balay 714119a934aSSatish Balay 715bb42667fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 716bb42667fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 717bb42667fSSatish Balay 718bb42667fSSatish Balay if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */ 719bb42667fSSatish Balay else if (zz!=yy){ 720bb42667fSSatish Balay ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 721bb42667fSSatish Balay PetscMemcpy(z,y,N*sizeof(Scalar)); 7220419e6cdSSatish Balay ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr); 723bb42667fSSatish Balay } 724119a934aSSatish Balay 725119a934aSSatish Balay idx = a->j; 726119a934aSSatish Balay v = a->a; 727119a934aSSatish Balay ii = a->i; 728119a934aSSatish Balay 729119a934aSSatish Balay switch (bs) { 730119a934aSSatish Balay case 1: 731119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 732119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 733119a934aSSatish Balay xb = x + i; x1 = xb[0]; 734119a934aSSatish Balay ib = idx + ai[i]; 735119a934aSSatish Balay for ( j=0; j<n; j++ ) { 736119a934aSSatish Balay z[ib[j]] += *v++ * x1; 737119a934aSSatish Balay } 738119a934aSSatish Balay } 739119a934aSSatish Balay break; 740119a934aSSatish Balay case 2: 741119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 742119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 743119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 744119a934aSSatish Balay ib = idx + ai[i]; 745119a934aSSatish Balay for ( j=0; j<n; j++ ) { 746119a934aSSatish Balay rval = ib[j]*2; 747119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 748119a934aSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 749119a934aSSatish Balay v += 4; 750119a934aSSatish Balay } 751119a934aSSatish Balay } 752119a934aSSatish Balay break; 753119a934aSSatish Balay case 3: 754119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 755119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 756119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 757119a934aSSatish Balay ib = idx + ai[i]; 758119a934aSSatish Balay for ( j=0; j<n; j++ ) { 759119a934aSSatish Balay rval = ib[j]*3; 760119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 761119a934aSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 762119a934aSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 763119a934aSSatish Balay v += 9; 764119a934aSSatish Balay } 765119a934aSSatish Balay } 766119a934aSSatish Balay break; 767119a934aSSatish Balay case 4: 768119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 769119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 770119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 771119a934aSSatish Balay ib = idx + ai[i]; 772119a934aSSatish Balay for ( j=0; j<n; j++ ) { 773119a934aSSatish Balay rval = ib[j]*4; 774119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 775119a934aSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 776119a934aSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 777119a934aSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 778119a934aSSatish Balay v += 16; 779119a934aSSatish Balay } 780119a934aSSatish Balay } 781119a934aSSatish Balay break; 782119a934aSSatish Balay case 5: 783119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 784119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 785119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 786119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 787119a934aSSatish Balay ib = idx + ai[i]; 788119a934aSSatish Balay for ( j=0; j<n; j++ ) { 789119a934aSSatish Balay rval = ib[j]*5; 790119a934aSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 791119a934aSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 792119a934aSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 793119a934aSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 794119a934aSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 795119a934aSSatish Balay v += 25; 796119a934aSSatish Balay } 797119a934aSSatish Balay } 798119a934aSSatish Balay break; 799119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 800119a934aSSatish Balay default: { 801119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 802119a934aSSatish Balay if (!a->mult_work) { 803*3b547af2SSatish Balay k = PetscMax(a->m,a->n); 804bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 805119a934aSSatish Balay CHKPTRQ(a->mult_work); 806119a934aSSatish Balay } 807119a934aSSatish Balay work = a->mult_work; 808119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 809119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 810119a934aSSatish Balay ncols = n*bs; 811119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 812119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 813119a934aSSatish Balay v += n*bs2; 814119a934aSSatish Balay x += bs; 815119a934aSSatish Balay workt = work; 816119a934aSSatish Balay for ( j=0; j<n; j++ ) { 817119a934aSSatish Balay zb = z + bs*(*idx++); 818119a934aSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 819119a934aSSatish Balay workt += bs; 820119a934aSSatish Balay } 821119a934aSSatish Balay } 822119a934aSSatish Balay } 823119a934aSSatish Balay } 8240419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 8250419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 8262593348eSBarry Smith return 0; 8272593348eSBarry Smith } 8282593348eSBarry Smith 829bb42667fSSatish Balay static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy) 830bb42667fSSatish Balay { 831bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8329df24120SSatish Balay int ierr,bs2=a->bs2; 833bb42667fSSatish Balay 834bb42667fSSatish Balay ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 835bb42667fSSatish Balay PLogFlops(2*bs2*(a->nz)-a->n); 836bb42667fSSatish Balay return 0; 837bb42667fSSatish Balay } 838bb42667fSSatish Balay 839bb42667fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 840bb42667fSSatish Balay { 841bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8429df24120SSatish Balay int ierr,bs2=a->bs2; 843bb42667fSSatish Balay 844bb42667fSSatish Balay ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 845bb42667fSSatish Balay PLogFlops(2*bs2*(a->nz)); 846bb42667fSSatish Balay return 0; 847bb42667fSSatish Balay } 848bb42667fSSatish Balay 849de6a44a3SBarry Smith static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 8502593348eSBarry Smith { 851b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8529df24120SSatish Balay if (nz) *nz = a->bs2*a->nz; 853bcd2baecSBarry Smith if (nza) *nza = a->maxnz; 854bcd2baecSBarry Smith if (mem) *mem = (int)A->mem; 8552593348eSBarry Smith return 0; 8562593348eSBarry Smith } 8572593348eSBarry Smith 85891d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 85991d316f6SSatish Balay { 86091d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 86191d316f6SSatish Balay 86291d316f6SSatish Balay if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 86391d316f6SSatish Balay 86491d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 86591d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 866a7c10996SSatish Balay (a->nz != b->nz)) { 86791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 86891d316f6SSatish Balay } 86991d316f6SSatish Balay 87091d316f6SSatish Balay /* if the a->i are the same */ 87191d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 87291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 87391d316f6SSatish Balay } 87491d316f6SSatish Balay 87591d316f6SSatish Balay /* if a->j are the same */ 87691d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 87791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 87891d316f6SSatish Balay } 87991d316f6SSatish Balay 88091d316f6SSatish Balay /* if a->a are the same */ 88191d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 88291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 88391d316f6SSatish Balay } 88491d316f6SSatish Balay *flg = PETSC_TRUE; 88591d316f6SSatish Balay return 0; 88691d316f6SSatish Balay 88791d316f6SSatish Balay } 88891d316f6SSatish Balay 88991d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 89091d316f6SSatish Balay { 89191d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8927e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 89317e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 89417e48fc4SSatish Balay 89517e48fc4SSatish Balay bs = a->bs; 89617e48fc4SSatish Balay aa = a->a; 89717e48fc4SSatish Balay ai = a->i; 89817e48fc4SSatish Balay aj = a->j; 89917e48fc4SSatish Balay ambs = a->mbs; 9009df24120SSatish Balay bs2 = a->bs2; 90191d316f6SSatish Balay 90291d316f6SSatish Balay VecSet(&zero,v); 90391d316f6SSatish Balay VecGetArray(v,&x); VecGetLocalSize(v,&n); 9049867e207SSatish Balay if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 90517e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 90617e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 90717e48fc4SSatish Balay if (aj[j] == i) { 90817e48fc4SSatish Balay row = i*bs; 9097e67e3f9SSatish Balay aa_j = aa+j*bs2; 9107e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 91191d316f6SSatish Balay break; 91291d316f6SSatish Balay } 91391d316f6SSatish Balay } 91491d316f6SSatish Balay } 91591d316f6SSatish Balay return 0; 91691d316f6SSatish Balay } 91791d316f6SSatish Balay 9189867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 9199867e207SSatish Balay { 9209867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 9219867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 9227e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 9239867e207SSatish Balay 9249867e207SSatish Balay ai = a->i; 9259867e207SSatish Balay aj = a->j; 9269867e207SSatish Balay aa = a->a; 9279867e207SSatish Balay m = a->m; 9289867e207SSatish Balay n = a->n; 9299867e207SSatish Balay bs = a->bs; 9309867e207SSatish Balay mbs = a->mbs; 9319df24120SSatish Balay bs2 = a->bs2; 9329867e207SSatish Balay if (ll) { 9339867e207SSatish Balay VecGetArray(ll,&l); VecGetSize(ll,&lm); 9349867e207SSatish Balay if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 9359867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 9369867e207SSatish Balay M = ai[i+1] - ai[i]; 9379867e207SSatish Balay li = l + i*bs; 9387e67e3f9SSatish Balay v = aa + bs2*ai[i]; 9399867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 9407e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 9419867e207SSatish Balay (*v++) *= li[k%bs]; 9429867e207SSatish Balay } 9439867e207SSatish Balay } 9449867e207SSatish Balay } 9459867e207SSatish Balay } 9469867e207SSatish Balay 9479867e207SSatish Balay if (rr) { 9489867e207SSatish Balay VecGetArray(rr,&r); VecGetSize(rr,&rn); 9499867e207SSatish Balay if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 9509867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 9519867e207SSatish Balay M = ai[i+1] - ai[i]; 9527e67e3f9SSatish Balay v = aa + bs2*ai[i]; 9539867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 9549867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 9559867e207SSatish Balay for ( k=0; k<bs; k++ ) { 9569867e207SSatish Balay x = ri[k]; 9579867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 9589867e207SSatish Balay } 9599867e207SSatish Balay } 9609867e207SSatish Balay } 9619867e207SSatish Balay } 9629867e207SSatish Balay return 0; 9639867e207SSatish Balay } 9649867e207SSatish Balay 9659867e207SSatish Balay 966b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 967b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 968b6490206SBarry Smith extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 969a7c10996SSatish Balay extern int MatSolveAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 9707fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 9717fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 9727fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 9737fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 9747fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 9757fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 9762593348eSBarry Smith 977b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 9782593348eSBarry Smith { 979b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 9802593348eSBarry Smith Scalar *v = a->a; 9812593348eSBarry Smith double sum = 0.0; 9829df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 9832593348eSBarry Smith 9842593348eSBarry Smith if (type == NORM_FROBENIUS) { 9850419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 9862593348eSBarry Smith #if defined(PETSC_COMPLEX) 9872593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 9882593348eSBarry Smith #else 9892593348eSBarry Smith sum += (*v)*(*v); v++; 9902593348eSBarry Smith #endif 9912593348eSBarry Smith } 9922593348eSBarry Smith *norm = sqrt(sum); 9932593348eSBarry Smith } 9942593348eSBarry Smith else { 995b6490206SBarry Smith SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 9962593348eSBarry Smith } 9972593348eSBarry Smith return 0; 9982593348eSBarry Smith } 9992593348eSBarry Smith 10002593348eSBarry Smith /* 10012593348eSBarry Smith note: This can only work for identity for row and col. It would 10022593348eSBarry Smith be good to check this and otherwise generate an error. 10032593348eSBarry Smith */ 1004b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 10052593348eSBarry Smith { 1006b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 10072593348eSBarry Smith Mat outA; 1008de6a44a3SBarry Smith int ierr; 10092593348eSBarry Smith 1010b6490206SBarry Smith if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 10112593348eSBarry Smith 10122593348eSBarry Smith outA = inA; 10132593348eSBarry Smith inA->factor = FACTOR_LU; 10142593348eSBarry Smith a->row = row; 10152593348eSBarry Smith a->col = col; 10162593348eSBarry Smith 1017de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 10182593348eSBarry Smith 10192593348eSBarry Smith if (!a->diag) { 1020b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 10212593348eSBarry Smith } 10227fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 10232593348eSBarry Smith return 0; 10242593348eSBarry Smith } 10252593348eSBarry Smith 1026b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 10272593348eSBarry Smith { 1028b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 10299df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1030b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1031b6490206SBarry Smith PLogFlops(totalnz); 10322593348eSBarry Smith return 0; 10332593348eSBarry Smith } 10342593348eSBarry Smith 10357e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 10367e67e3f9SSatish Balay { 10377e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 10387e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1039a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 10409df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 10417e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 10427e67e3f9SSatish Balay 10437e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 10447e67e3f9SSatish Balay row = im[k]; brow = row/bs; 10457e67e3f9SSatish Balay if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 10467e67e3f9SSatish Balay if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1047a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 10487e67e3f9SSatish Balay nrow = ailen[brow]; 10497e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 10507e67e3f9SSatish Balay if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 10517e67e3f9SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1052a7c10996SSatish Balay col = in[l] ; 10537e67e3f9SSatish Balay bcol = col/bs; 10547e67e3f9SSatish Balay cidx = col%bs; 10557e67e3f9SSatish Balay ridx = row%bs; 10567e67e3f9SSatish Balay high = nrow; 10577e67e3f9SSatish Balay low = 0; /* assume unsorted */ 10587e67e3f9SSatish Balay while (high-low > 5) { 10597e67e3f9SSatish Balay t = (low+high)/2; 10607e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 10617e67e3f9SSatish Balay else low = t; 10627e67e3f9SSatish Balay } 10637e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 10647e67e3f9SSatish Balay if (rp[i] > bcol) break; 10657e67e3f9SSatish Balay if (rp[i] == bcol) { 10667e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 10677e67e3f9SSatish Balay goto finished; 10687e67e3f9SSatish Balay } 10697e67e3f9SSatish Balay } 10707e67e3f9SSatish Balay *v++ = zero; 10717e67e3f9SSatish Balay finished:; 10727e67e3f9SSatish Balay } 10737e67e3f9SSatish Balay } 10747e67e3f9SSatish Balay return 0; 10757e67e3f9SSatish Balay } 10767e67e3f9SSatish Balay 1077b6490206SBarry Smith int MatPrintHelp_SeqBAIJ(Mat A) 10782593348eSBarry Smith { 10792593348eSBarry Smith static int called = 0; 10802593348eSBarry Smith 10812593348eSBarry Smith if (called) return 0; else called = 1; 10822593348eSBarry Smith return 0; 10832593348eSBarry Smith } 10842593348eSBarry Smith /* -------------------------------------------------------------------*/ 1085cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 10869867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1087119a934aSSatish Balay MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ, 1088bb42667fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1089a7c10996SSatish Balay MatSolve_SeqBAIJ,MatSolveAdd_SeqBAIJ, 1090b6490206SBarry Smith 0,0, 1091de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 1092b6490206SBarry Smith 0, 1093f2501298SSatish Balay MatTranspose_SeqBAIJ, 109417e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 10959867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1096584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 1097b6490206SBarry Smith 0, 1098b6490206SBarry Smith MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1099b6490206SBarry Smith MatGetReordering_SeqBAIJ, 11007fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1101b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1102de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 1103b6490206SBarry Smith 0,0,/* MatConvert_SeqBAIJ */ 0, 1104b6490206SBarry Smith 0,0, 1105b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 1106b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 1107b6490206SBarry Smith 0,0, 11087e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 1109b6490206SBarry Smith MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1110b6490206SBarry Smith 0}; 11112593348eSBarry Smith 11122593348eSBarry Smith /*@C 111344cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 111444cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 111544cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 11162593348eSBarry Smith (or nzz). By setting these parameters accurately, performance can be 11172593348eSBarry Smith increased by more than a factor of 50. 11182593348eSBarry Smith 11192593348eSBarry Smith Input Parameters: 11202593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 1121b6490206SBarry Smith . bs - size of block 11222593348eSBarry Smith . m - number of rows 11232593348eSBarry Smith . n - number of columns 1124b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1125b6490206SBarry Smith . nzz - number of block nonzeros per block row or PETSC_NULL 1126b6490206SBarry Smith (possibly different for each row) 11272593348eSBarry Smith 11282593348eSBarry Smith Output Parameter: 11292593348eSBarry Smith . A - the matrix 11302593348eSBarry Smith 11312593348eSBarry Smith Notes: 113244cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 11332593348eSBarry Smith storage. That is, the stored row and column indices can begin at 113444cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 11352593348eSBarry Smith 11362593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 11372593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 11382593348eSBarry Smith allocation. For additional details, see the users manual chapter on 11392593348eSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 11402593348eSBarry Smith 114144cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 11422593348eSBarry Smith @*/ 1143b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 11442593348eSBarry Smith { 11452593348eSBarry Smith Mat B; 1146b6490206SBarry Smith Mat_SeqBAIJ *b; 1147f2501298SSatish Balay int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1148b6490206SBarry Smith 1149f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 1150f2501298SSatish Balay SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 11512593348eSBarry Smith 11522593348eSBarry Smith *A = 0; 1153b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 11542593348eSBarry Smith PLogObjectCreate(B); 1155b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 115644cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 11572593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 11587fc0212eSBarry Smith switch (bs) { 11597fc0212eSBarry Smith case 1: 11607fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 11617fc0212eSBarry Smith break; 11624eeb42bcSBarry Smith case 2: 11634eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 11646d84be18SBarry Smith break; 1165f327dd97SBarry Smith case 3: 1166f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 11674eeb42bcSBarry Smith break; 11686d84be18SBarry Smith case 4: 11696d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 11706d84be18SBarry Smith break; 11716d84be18SBarry Smith case 5: 11726d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 11736d84be18SBarry Smith break; 11747fc0212eSBarry Smith } 1175b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1176b6490206SBarry Smith B->view = MatView_SeqBAIJ; 11772593348eSBarry Smith B->factor = 0; 11782593348eSBarry Smith B->lupivotthreshold = 1.0; 11792593348eSBarry Smith b->row = 0; 11802593348eSBarry Smith b->col = 0; 11812593348eSBarry Smith b->reallocs = 0; 11822593348eSBarry Smith 118344cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 118444cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1185b6490206SBarry Smith b->mbs = mbs; 1186f2501298SSatish Balay b->nbs = nbs; 1187b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 11882593348eSBarry Smith if (nnz == PETSC_NULL) { 1189119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 11902593348eSBarry Smith else if (nz <= 0) nz = 1; 1191b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1192b6490206SBarry Smith nz = nz*mbs; 11932593348eSBarry Smith } 11942593348eSBarry Smith else { 11952593348eSBarry Smith nz = 0; 1196b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 11972593348eSBarry Smith } 11982593348eSBarry Smith 11992593348eSBarry Smith /* allocate the matrix space */ 12007e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 12012593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 12027e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 12037e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 12042593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 12052593348eSBarry Smith b->i = b->j + nz; 12062593348eSBarry Smith b->singlemalloc = 1; 12072593348eSBarry Smith 1208de6a44a3SBarry Smith b->i[0] = 0; 1209b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 12102593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 12112593348eSBarry Smith } 12122593348eSBarry Smith 1213b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1214b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1215b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1216b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 12172593348eSBarry Smith 1218b6490206SBarry Smith b->bs = bs; 12199df24120SSatish Balay b->bs2 = bs2; 1220b6490206SBarry Smith b->mbs = mbs; 12212593348eSBarry Smith b->nz = 0; 12222593348eSBarry Smith b->maxnz = nz; 12232593348eSBarry Smith b->sorted = 0; 12242593348eSBarry Smith b->roworiented = 1; 12252593348eSBarry Smith b->nonew = 0; 12262593348eSBarry Smith b->diag = 0; 12272593348eSBarry Smith b->solve_work = 0; 1228de6a44a3SBarry Smith b->mult_work = 0; 12292593348eSBarry Smith b->spptr = 0; 12302593348eSBarry Smith *A = B; 12312593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 12322593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 12332593348eSBarry Smith return 0; 12342593348eSBarry Smith } 12352593348eSBarry Smith 1236b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 12372593348eSBarry Smith { 12382593348eSBarry Smith Mat C; 1239b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 12409df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1241de6a44a3SBarry Smith 1242de6a44a3SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 12432593348eSBarry Smith 12442593348eSBarry Smith *B = 0; 1245b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 12462593348eSBarry Smith PLogObjectCreate(C); 1247b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 12482593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1249b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1250b6490206SBarry Smith C->view = MatView_SeqBAIJ; 12512593348eSBarry Smith C->factor = A->factor; 12522593348eSBarry Smith c->row = 0; 12532593348eSBarry Smith c->col = 0; 12542593348eSBarry Smith C->assembled = PETSC_TRUE; 12552593348eSBarry Smith 125644cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 125744cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 125844cd7ae7SLois Curfman McInnes C->M = a->m; 125944cd7ae7SLois Curfman McInnes C->N = a->n; 126044cd7ae7SLois Curfman McInnes 1261b6490206SBarry Smith c->bs = a->bs; 12629df24120SSatish Balay c->bs2 = a->bs2; 1263b6490206SBarry Smith c->mbs = a->mbs; 1264b6490206SBarry Smith c->nbs = a->nbs; 12652593348eSBarry Smith 1266b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1267b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1268b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 12692593348eSBarry Smith c->imax[i] = a->imax[i]; 12702593348eSBarry Smith c->ilen[i] = a->ilen[i]; 12712593348eSBarry Smith } 12722593348eSBarry Smith 12732593348eSBarry Smith /* allocate the matrix space */ 12742593348eSBarry Smith c->singlemalloc = 1; 12757e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 12762593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 12777e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1278de6a44a3SBarry Smith c->i = c->j + nz; 1279b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1280b6490206SBarry Smith if (mbs > 0) { 1281de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 12822593348eSBarry Smith if (cpvalues == COPY_VALUES) { 12837e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 12842593348eSBarry Smith } 12852593348eSBarry Smith } 12862593348eSBarry Smith 1287b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 12882593348eSBarry Smith c->sorted = a->sorted; 12892593348eSBarry Smith c->roworiented = a->roworiented; 12902593348eSBarry Smith c->nonew = a->nonew; 12912593348eSBarry Smith 12922593348eSBarry Smith if (a->diag) { 1293b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1294b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1295b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 12962593348eSBarry Smith c->diag[i] = a->diag[i]; 12972593348eSBarry Smith } 12982593348eSBarry Smith } 12992593348eSBarry Smith else c->diag = 0; 13002593348eSBarry Smith c->nz = a->nz; 13012593348eSBarry Smith c->maxnz = a->maxnz; 13022593348eSBarry Smith c->solve_work = 0; 13032593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 13047fc0212eSBarry Smith c->mult_work = 0; 13052593348eSBarry Smith *B = C; 13062593348eSBarry Smith return 0; 13072593348eSBarry Smith } 13082593348eSBarry Smith 130919bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 13102593348eSBarry Smith { 1311b6490206SBarry Smith Mat_SeqBAIJ *a; 13122593348eSBarry Smith Mat B; 1313de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1314b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 131535aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1316a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 1317b6490206SBarry Smith Scalar *aa; 131819bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 13192593348eSBarry Smith 132035aab85fSBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1321de6a44a3SBarry Smith bs2 = bs*bs; 1322b6490206SBarry Smith 13232593348eSBarry Smith MPI_Comm_size(comm,&size); 1324b6490206SBarry Smith if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 132590ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 132677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1327de6a44a3SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 13282593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 13292593348eSBarry Smith 1330b6490206SBarry Smith if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 133135aab85fSBarry Smith 133235aab85fSBarry Smith /* 133335aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 133435aab85fSBarry Smith divisible by the blocksize 133535aab85fSBarry Smith */ 1336b6490206SBarry Smith mbs = M/bs; 133735aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 133835aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 133935aab85fSBarry Smith else mbs++; 134035aab85fSBarry Smith if (extra_rows) { 134135aab85fSBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 134235aab85fSBarry Smith } 1343b6490206SBarry Smith 13442593348eSBarry Smith /* read in row lengths */ 134535aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 134677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 134735aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 13482593348eSBarry Smith 1349b6490206SBarry Smith /* read in column indices */ 135035aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 135177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 135235aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1353b6490206SBarry Smith 1354b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1355b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1356b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 135735aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 135835aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 135935aab85fSBarry Smith masked = mask + mbs; 1360b6490206SBarry Smith rowcount = 0; nzcount = 0; 1361b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 136235aab85fSBarry Smith nmask = 0; 1363b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1364b6490206SBarry Smith kmax = rowlengths[rowcount]; 1365b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 136635aab85fSBarry Smith tmp = jj[nzcount++]/bs; 136735aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1368b6490206SBarry Smith } 1369b6490206SBarry Smith rowcount++; 1370b6490206SBarry Smith } 137135aab85fSBarry Smith browlengths[i] += nmask; 137235aab85fSBarry Smith /* zero out the mask elements we set */ 137335aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1374b6490206SBarry Smith } 1375b6490206SBarry Smith 13762593348eSBarry Smith /* create our matrix */ 137735aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 137835aab85fSBarry Smith CHKERRQ(ierr); 13792593348eSBarry Smith B = *A; 1380b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 13812593348eSBarry Smith 13822593348eSBarry Smith /* set matrix "i" values */ 1383de6a44a3SBarry Smith a->i[0] = 0; 1384b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 1385b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1386b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 13872593348eSBarry Smith } 1388b6490206SBarry Smith a->nz = 0; 1389b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 13902593348eSBarry Smith 1391b6490206SBarry Smith /* read in nonzero values */ 139235aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 139377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 139435aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1395b6490206SBarry Smith 1396b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1397b6490206SBarry Smith nzcount = 0; jcount = 0; 1398b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 1399b6490206SBarry Smith nzcountb = nzcount; 140035aab85fSBarry Smith nmask = 0; 1401b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1402b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1403b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 140435aab85fSBarry Smith tmp = jj[nzcount++]/bs; 140535aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1406b6490206SBarry Smith } 1407b6490206SBarry Smith rowcount++; 1408b6490206SBarry Smith } 1409de6a44a3SBarry Smith /* sort the masked values */ 141077c4ece6SBarry Smith PetscSortInt(nmask,masked); 1411de6a44a3SBarry Smith 1412b6490206SBarry Smith /* set "j" values into matrix */ 1413b6490206SBarry Smith maskcount = 1; 141435aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 141535aab85fSBarry Smith a->j[jcount++] = masked[j]; 1416de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1417b6490206SBarry Smith } 1418b6490206SBarry Smith /* set "a" values into matrix */ 1419de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1420b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1421b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1422b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 1423de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1424de6a44a3SBarry Smith block = mask[tmp] - 1; 1425de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1426de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1427b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 1428b6490206SBarry Smith } 1429b6490206SBarry Smith } 143035aab85fSBarry Smith /* zero out the mask elements we set */ 143135aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1432b6490206SBarry Smith } 143335aab85fSBarry Smith if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1434b6490206SBarry Smith 1435b6490206SBarry Smith PetscFree(rowlengths); 1436b6490206SBarry Smith PetscFree(browlengths); 1437b6490206SBarry Smith PetscFree(aa); 1438b6490206SBarry Smith PetscFree(jj); 1439b6490206SBarry Smith PetscFree(mask); 1440b6490206SBarry Smith 1441b6490206SBarry Smith B->assembled = PETSC_TRUE; 1442de6a44a3SBarry Smith 1443de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1444de6a44a3SBarry Smith if (flg) { 144519bcc07fSBarry Smith Viewer tviewer; 144619bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 144790ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 144819bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 144919bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1450de6a44a3SBarry Smith } 1451de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1452de6a44a3SBarry Smith if (flg) { 145319bcc07fSBarry Smith Viewer tviewer; 145419bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 145590ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 145619bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 145719bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1458de6a44a3SBarry Smith } 1459de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1460de6a44a3SBarry Smith if (flg) { 146119bcc07fSBarry Smith Viewer tviewer; 146219bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 146319bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 146419bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1465de6a44a3SBarry Smith } 1466de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1467de6a44a3SBarry Smith if (flg) { 146819bcc07fSBarry Smith Viewer tviewer; 146919bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 147090ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 147119bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 147219bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1473de6a44a3SBarry Smith } 1474de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1475de6a44a3SBarry Smith if (flg) { 147619bcc07fSBarry Smith Viewer tviewer; 147719bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 147819bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 147919bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 148019bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1481de6a44a3SBarry Smith } 14822593348eSBarry Smith return 0; 14832593348eSBarry Smith } 14842593348eSBarry Smith 14852593348eSBarry Smith 14862593348eSBarry Smith 1487