12593348eSBarry Smith #ifndef lint 2*4e220ebcSLois Curfman McInnes static char vcid[] = "$Id: baij.c,v 1.66 1996/08/15 12:48:03 bsmith Exp curfman $"; 32593348eSBarry Smith #endif 42593348eSBarry Smith 52593348eSBarry Smith /* 6b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 72593348eSBarry Smith matrix storage format. 82593348eSBarry Smith */ 970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 101a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 111a6a6d98SBarry Smith #include "src/inline/spops.h" 122593348eSBarry Smith #include "petsc.h" 133b547af2SSatish Balay 14bcd2baecSBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 152593348eSBarry Smith 16a2ce50c7SBarry Smith static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 172593348eSBarry Smith { 18b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 19bcd2baecSBarry Smith int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 202593348eSBarry Smith 212593348eSBarry Smith /* 222593348eSBarry Smith this is tacky: In the future when we have written special factorization 232593348eSBarry Smith and solve routines for the identity permutation we should use a 242593348eSBarry Smith stride index set instead of the general one. 252593348eSBarry Smith */ 262593348eSBarry Smith if (type == ORDER_NATURAL) { 272593348eSBarry Smith idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 282593348eSBarry Smith for ( i=0; i<n; i++ ) idx[i] = i; 29537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 30537820f0SBarry Smith ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 312593348eSBarry Smith PetscFree(idx); 322593348eSBarry Smith ISSetPermutation(*rperm); 332593348eSBarry Smith ISSetPermutation(*cperm); 342593348eSBarry Smith ISSetIdentity(*rperm); 352593348eSBarry Smith ISSetIdentity(*cperm); 362593348eSBarry Smith return 0; 372593348eSBarry Smith } 382593348eSBarry Smith 39bcd2baecSBarry Smith MatReorderingRegisterAll(); 40a7c10996SSatish Balay ishift = 0; 41bcd2baecSBarry Smith oshift = -MatReorderingIndexShift[(int)type]; 42bcd2baecSBarry Smith if (MatReorderingRequiresSymmetric[(int)type]) { 431a6a6d98SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 441a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 452593348eSBarry Smith PetscFree(ia); PetscFree(ja); 46bcd2baecSBarry Smith } else { 47bcd2baecSBarry Smith if (ishift == oshift) { 481a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 49bcd2baecSBarry Smith } 50bcd2baecSBarry Smith else if (ishift == -1) { 51bcd2baecSBarry Smith /* temporarily subtract 1 from i and j indices */ 521a6a6d98SBarry Smith int nz = a->i[n] - 1; 53bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 541a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 551a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 56bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 571a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 58bcd2baecSBarry Smith } else { 59bcd2baecSBarry Smith /* temporarily add 1 to i and j indices */ 601a6a6d98SBarry Smith int nz = a->i[n] - 1; 61bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 621a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 631a6a6d98SBarry Smith ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 64bcd2baecSBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 651a6a6d98SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 66bcd2baecSBarry Smith } 67bcd2baecSBarry Smith } 682593348eSBarry Smith return 0; 692593348eSBarry Smith } 702593348eSBarry Smith 71de6a44a3SBarry Smith /* 72de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 73de6a44a3SBarry Smith */ 74de6a44a3SBarry Smith 75de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 76de6a44a3SBarry Smith { 77de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 787fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 79de6a44a3SBarry Smith 80de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 81de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 827fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 83de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 84de6a44a3SBarry Smith if (a->j[j] == i) { 85de6a44a3SBarry Smith diag[i] = j; 86de6a44a3SBarry Smith break; 87de6a44a3SBarry Smith } 88de6a44a3SBarry Smith } 89de6a44a3SBarry Smith } 90de6a44a3SBarry Smith a->diag = diag; 91de6a44a3SBarry Smith return 0; 92de6a44a3SBarry Smith } 932593348eSBarry Smith 942593348eSBarry Smith #include "draw.h" 952593348eSBarry Smith #include "pinclude/pviewer.h" 9677c4ece6SBarry Smith #include "sys.h" 972593348eSBarry Smith 98b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 992593348eSBarry Smith { 100b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1019df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102b6490206SBarry Smith Scalar *aa; 1032593348eSBarry Smith 10490ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1052593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1062593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1072593348eSBarry Smith col_lens[1] = a->m; 1082593348eSBarry Smith col_lens[2] = a->n; 1097e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1102593348eSBarry Smith 1112593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 112b6490206SBarry Smith count = 0; 113b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 114b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 115b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 116b6490206SBarry Smith } 1172593348eSBarry Smith } 11877c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1192593348eSBarry Smith PetscFree(col_lens); 1202593348eSBarry Smith 1212593348eSBarry Smith /* store column indices (zero start index) */ 1227e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 123b6490206SBarry Smith count = 0; 124b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 125b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 126b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 127b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 128b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1292593348eSBarry Smith } 1302593348eSBarry Smith } 131b6490206SBarry Smith } 132b6490206SBarry Smith } 1337e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 134b6490206SBarry Smith PetscFree(jj); 1352593348eSBarry Smith 1362593348eSBarry Smith /* store nonzero values */ 1377e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 138b6490206SBarry Smith count = 0; 139b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 140b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 141b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 142b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1437e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 144b6490206SBarry Smith } 145b6490206SBarry Smith } 146b6490206SBarry Smith } 147b6490206SBarry Smith } 1487e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 149b6490206SBarry Smith PetscFree(aa); 1502593348eSBarry Smith return 0; 1512593348eSBarry Smith } 1522593348eSBarry Smith 153b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1542593348eSBarry Smith { 155b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1569df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1572593348eSBarry Smith FILE *fd; 1582593348eSBarry Smith char *outputname; 1592593348eSBarry Smith 16090ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1612593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16290ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 1637ddc982cSLois Curfman McInnes if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 1647ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1652593348eSBarry Smith } 16690ace30eSBarry Smith else if (format == ASCII_FORMAT_MATLAB) { 167b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 1682593348eSBarry Smith } 16944cd7ae7SLois Curfman McInnes else if (format == ASCII_FORMAT_COMMON) { 17044cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17144cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17244cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17344cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17444cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17544cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 17644cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17744cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17844cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 17944cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18044cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18144cd7ae7SLois Curfman McInnes #else 18244cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18344cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18444cd7ae7SLois Curfman McInnes #endif 18544cd7ae7SLois Curfman McInnes } 18644cd7ae7SLois Curfman McInnes } 18744cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes } 19044cd7ae7SLois Curfman McInnes } 1912593348eSBarry Smith else { 192b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 193b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 194b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 195b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 196b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19788685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1987e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 19988685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2007e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20188685aaeSLois Curfman McInnes } 20288685aaeSLois Curfman McInnes else { 2037e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20488685aaeSLois Curfman McInnes } 20588685aaeSLois Curfman McInnes #else 2067e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20788685aaeSLois Curfman McInnes #endif 2082593348eSBarry Smith } 2092593348eSBarry Smith } 2102593348eSBarry Smith fprintf(fd,"\n"); 2112593348eSBarry Smith } 2122593348eSBarry Smith } 213b6490206SBarry Smith } 2142593348eSBarry Smith fflush(fd); 2152593348eSBarry Smith return 0; 2162593348eSBarry Smith } 2172593348eSBarry Smith 2183270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2193270192aSSatish Balay { 2203270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2213270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2223270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2233270192aSSatish Balay Scalar *aa; 2243270192aSSatish Balay Draw draw; 2253270192aSSatish Balay DrawButton button; 2263270192aSSatish Balay PetscTruth isnull; 2273270192aSSatish Balay 2283270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2293270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2303270192aSSatish Balay 2313270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2323270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2333270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2343270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2353270192aSSatish Balay color = DRAW_BLUE; 2363270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2373270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2383270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2393270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2403270192aSSatish Balay aa = a->a + j*bs2; 2413270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2423270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2433270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 2443270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2453270192aSSatish Balay } 2463270192aSSatish Balay } 2473270192aSSatish Balay } 2483270192aSSatish Balay } 2493270192aSSatish Balay color = DRAW_CYAN; 2503270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2513270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2523270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2533270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2543270192aSSatish Balay aa = a->a + j*bs2; 2553270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2563270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2573270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 2583270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2593270192aSSatish Balay } 2603270192aSSatish Balay } 2613270192aSSatish Balay } 2623270192aSSatish Balay } 2633270192aSSatish Balay 2643270192aSSatish Balay color = DRAW_RED; 2653270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2663270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2673270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2683270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2693270192aSSatish Balay aa = a->a + j*bs2; 2703270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2713270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2723270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 2733270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2743270192aSSatish Balay } 2753270192aSSatish Balay } 2763270192aSSatish Balay } 2773270192aSSatish Balay } 2783270192aSSatish Balay 2793270192aSSatish Balay DrawFlush(draw); 2803270192aSSatish Balay DrawGetPause(draw,&pause); 2813270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2823270192aSSatish Balay 2833270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2843270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2853270192aSSatish Balay while (button != BUTTON_RIGHT) { 2863270192aSSatish Balay DrawClear(draw); 2873270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2883270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2893270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2903270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 2913270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 2923270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 2933270192aSSatish Balay w *= scale; h *= scale; 2943270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2953270192aSSatish Balay color = DRAW_BLUE; 2963270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2973270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2983270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2993270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3003270192aSSatish Balay aa = a->a + j*bs2; 3013270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3023270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3033270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 3043270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3053270192aSSatish Balay } 3063270192aSSatish Balay } 3073270192aSSatish Balay } 3083270192aSSatish Balay } 3093270192aSSatish Balay color = DRAW_CYAN; 3103270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3113270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3123270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3133270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3143270192aSSatish Balay aa = a->a + j*bs2; 3153270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3163270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3173270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 3183270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3193270192aSSatish Balay } 3203270192aSSatish Balay } 3213270192aSSatish Balay } 3223270192aSSatish Balay } 3233270192aSSatish Balay 3243270192aSSatish Balay color = DRAW_RED; 3253270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3263270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3273270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3283270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3293270192aSSatish Balay aa = a->a + j*bs2; 3303270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3313270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3323270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 3333270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3343270192aSSatish Balay } 3353270192aSSatish Balay } 3363270192aSSatish Balay } 3373270192aSSatish Balay } 3383270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3393270192aSSatish Balay } 3403270192aSSatish Balay return 0; 3413270192aSSatish Balay } 3423270192aSSatish Balay 343b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3442593348eSBarry Smith { 3452593348eSBarry Smith Mat A = (Mat) obj; 34619bcc07fSBarry Smith ViewerType vtype; 34719bcc07fSBarry Smith int ierr; 3482593348eSBarry Smith 34919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 35019bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 351b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 3522593348eSBarry Smith } 35319bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 354b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3552593348eSBarry Smith } 35619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 357b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3582593348eSBarry Smith } 35919bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3603270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3612593348eSBarry Smith } 3622593348eSBarry Smith return 0; 3632593348eSBarry Smith } 364b6490206SBarry Smith 365119a934aSSatish Balay #define CHUNKSIZE 10 366cd0e1443SSatish Balay 367cd0e1443SSatish Balay /* This version has row oriented v */ 368cd0e1443SSatish Balay static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 369cd0e1443SSatish Balay { 370cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 371cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 372cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 373a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3749df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 375cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 376cd0e1443SSatish Balay 377cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 378cd0e1443SSatish Balay row = im[k]; brow = row/bs; 379cd0e1443SSatish Balay if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 380cd0e1443SSatish Balay if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 381a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 382cd0e1443SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; 383cd0e1443SSatish Balay low = 0; 384cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 385cd0e1443SSatish Balay if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 386cd0e1443SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 387a7c10996SSatish Balay col = in[l]; bcol = col/bs; 388cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 389cd0e1443SSatish Balay if (roworiented) { 390cd0e1443SSatish Balay value = *v++; 391cd0e1443SSatish Balay } 392cd0e1443SSatish Balay else { 393cd0e1443SSatish Balay value = v[k + l*m]; 394cd0e1443SSatish Balay } 395cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 396cd0e1443SSatish Balay while (high-low > 5) { 397cd0e1443SSatish Balay t = (low+high)/2; 398cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 399cd0e1443SSatish Balay else low = t; 400cd0e1443SSatish Balay } 401cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 402cd0e1443SSatish Balay if (rp[i] > bcol) break; 403cd0e1443SSatish Balay if (rp[i] == bcol) { 4047e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 405cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 406cd0e1443SSatish Balay else *bap = value; 407cd0e1443SSatish Balay goto noinsert; 408cd0e1443SSatish Balay } 409cd0e1443SSatish Balay } 410cd0e1443SSatish Balay if (nonew) goto noinsert; 411cd0e1443SSatish Balay if (nrow >= rmax) { 412cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 413cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 414cd0e1443SSatish Balay Scalar *new_a; 415cd0e1443SSatish Balay 416cd0e1443SSatish Balay /* malloc new storage space */ 4177e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 418cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4197e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 420cd0e1443SSatish Balay new_i = new_j + new_nz; 421cd0e1443SSatish Balay 422cd0e1443SSatish Balay /* copy over old data into new slots */ 423cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4247e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 425a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 426a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 427a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 428cd0e1443SSatish Balay len*sizeof(int)); 429a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 430a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 431a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 432a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 433cd0e1443SSatish Balay /* free up old matrix storage */ 434cd0e1443SSatish Balay PetscFree(a->a); 435cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 436cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 437cd0e1443SSatish Balay a->singlemalloc = 1; 438cd0e1443SSatish Balay 439a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 440cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4417e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 44218e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 443cd0e1443SSatish Balay a->reallocs++; 444119a934aSSatish Balay a->nz++; 445cd0e1443SSatish Balay } 4467e67e3f9SSatish Balay N = nrow++ - 1; 447cd0e1443SSatish Balay /* shift up all the later entries in this row */ 448cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 449cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4507e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 451cd0e1443SSatish Balay } 4527e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 453cd0e1443SSatish Balay rp[i] = bcol; 4547e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 455cd0e1443SSatish Balay noinsert:; 456cd0e1443SSatish Balay low = i; 457cd0e1443SSatish Balay } 458cd0e1443SSatish Balay ailen[brow] = nrow; 459cd0e1443SSatish Balay } 460cd0e1443SSatish Balay return 0; 461cd0e1443SSatish Balay } 462cd0e1443SSatish Balay 4630b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 4640b824a48SSatish Balay { 4650b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4660b824a48SSatish Balay *m = a->m; *n = a->n; 4670b824a48SSatish Balay return 0; 4680b824a48SSatish Balay } 4690b824a48SSatish Balay 4700b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 4710b824a48SSatish Balay { 4720b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4730b824a48SSatish Balay *m = 0; *n = a->m; 4740b824a48SSatish Balay return 0; 4750b824a48SSatish Balay } 4760b824a48SSatish Balay 4779867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4789867e207SSatish Balay { 4799867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4807e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 4819867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 4829867e207SSatish Balay 4839867e207SSatish Balay bs = a->bs; 4849867e207SSatish Balay ai = a->i; 4859867e207SSatish Balay aj = a->j; 4869867e207SSatish Balay aa = a->a; 4879df24120SSatish Balay bs2 = a->bs2; 4889867e207SSatish Balay 4899867e207SSatish Balay if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 4909867e207SSatish Balay 4919867e207SSatish Balay bn = row/bs; /* Block number */ 4929867e207SSatish Balay bp = row % bs; /* Block Position */ 4939867e207SSatish Balay M = ai[bn+1] - ai[bn]; 4949867e207SSatish Balay *nz = bs*M; 4959867e207SSatish Balay 4969867e207SSatish Balay if (v) { 4979867e207SSatish Balay *v = 0; 4989867e207SSatish Balay if (*nz) { 4999867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 5009867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 5019867e207SSatish Balay v_i = *v + i*bs; 5027e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 5037e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 5049867e207SSatish Balay } 5059867e207SSatish Balay } 5069867e207SSatish Balay } 5079867e207SSatish Balay 5089867e207SSatish Balay if (idx) { 5099867e207SSatish Balay *idx = 0; 5109867e207SSatish Balay if (*nz) { 5119867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 5129867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 5139867e207SSatish Balay idx_i = *idx + i*bs; 5149867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 5159867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 5169867e207SSatish Balay } 5179867e207SSatish Balay } 5189867e207SSatish Balay } 5199867e207SSatish Balay return 0; 5209867e207SSatish Balay } 5219867e207SSatish Balay 5229867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 5239867e207SSatish Balay { 5249867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 5259867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 5269867e207SSatish Balay return 0; 5279867e207SSatish Balay } 528b6490206SBarry Smith 529f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 530f2501298SSatish Balay { 531f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 532f2501298SSatish Balay Mat C; 533f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 5349df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 535f2501298SSatish Balay Scalar *array=a->a; 536f2501298SSatish Balay 537f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 538f2501298SSatish Balay SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 539f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 540f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 541a7c10996SSatish Balay 542a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 543f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 544f2501298SSatish Balay PetscFree(col); 545f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 546f2501298SSatish Balay cols = rows + bs; 547f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 548f2501298SSatish Balay cols[0] = i*bs; 549f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 550f2501298SSatish Balay len = ai[i+1] - ai[i]; 551f2501298SSatish Balay for ( j=0; j<len; j++ ) { 552f2501298SSatish Balay rows[0] = (*aj++)*bs; 553f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 554f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 555f2501298SSatish Balay array += bs2; 556f2501298SSatish Balay } 557f2501298SSatish Balay } 5581073c447SSatish Balay PetscFree(rows); 559f2501298SSatish Balay 5606d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5616d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 562f2501298SSatish Balay 563f2501298SSatish Balay if (B != PETSC_NULL) { 564f2501298SSatish Balay *B = C; 565f2501298SSatish Balay } else { 566f2501298SSatish Balay /* This isn't really an in-place transpose */ 567f2501298SSatish Balay PetscFree(a->a); 568f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 569f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 570f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 571f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 572f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 573f2501298SSatish Balay PetscFree(a); 574f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 575f2501298SSatish Balay PetscHeaderDestroy(C); 576f2501298SSatish Balay } 577f2501298SSatish Balay return 0; 578f2501298SSatish Balay } 579f2501298SSatish Balay 580f2501298SSatish Balay 581584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 582584200bdSSatish Balay { 583584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 584584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 585a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 5869df24120SSatish Balay int mbs = a->mbs, bs2 = a->bs2; 587584200bdSSatish Balay Scalar *aa = a->a, *ap; 588584200bdSSatish Balay 5896d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 590584200bdSSatish Balay 591584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 592584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 593584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 594584200bdSSatish Balay if (fshift) { 595a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 596584200bdSSatish Balay N = ailen[i]; 597584200bdSSatish Balay for ( j=0; j<N; j++ ) { 598584200bdSSatish Balay ip[j-fshift] = ip[j]; 5997e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 600584200bdSSatish Balay } 601584200bdSSatish Balay } 602584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 603584200bdSSatish Balay } 604584200bdSSatish Balay if (mbs) { 605584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 606584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 607584200bdSSatish Balay } 608584200bdSSatish Balay /* reset ilen and imax for each row */ 609584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 610584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 611584200bdSSatish Balay } 612a7c10996SSatish Balay a->nz = ai[mbs]; 613584200bdSSatish Balay 614584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 615584200bdSSatish Balay if (fshift && a->diag) { 616584200bdSSatish Balay PetscFree(a->diag); 617584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 618584200bdSSatish Balay a->diag = 0; 619584200bdSSatish Balay } 620537820f0SBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n", 621537820f0SBarry Smith fshift*bs2,a->nz*bs2,m,a->bs); 622584200bdSSatish Balay PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 623584200bdSSatish Balay a->reallocs); 624*4e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 625*4e220ebcSLois Curfman McInnes 626584200bdSSatish Balay return 0; 627584200bdSSatish Balay } 628584200bdSSatish Balay 629b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 6302593348eSBarry Smith { 631b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6329df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 6332593348eSBarry Smith return 0; 6342593348eSBarry Smith } 6352593348eSBarry Smith 636b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 6372593348eSBarry Smith { 6382593348eSBarry Smith Mat A = (Mat) obj; 639b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6402593348eSBarry Smith 6412593348eSBarry Smith #if defined(PETSC_LOG) 6422593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 6432593348eSBarry Smith #endif 6442593348eSBarry Smith PetscFree(a->a); 6452593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 6462593348eSBarry Smith if (a->diag) PetscFree(a->diag); 6472593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 6482593348eSBarry Smith if (a->imax) PetscFree(a->imax); 6492593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 650de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 6512593348eSBarry Smith PetscFree(a); 652f2655603SLois Curfman McInnes PLogObjectDestroy(A); 653f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 6542593348eSBarry Smith return 0; 6552593348eSBarry Smith } 6562593348eSBarry Smith 657b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 6582593348eSBarry Smith { 659b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6606d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 6616d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 6626d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 6636d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 6646d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 6656d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 6666d4a8577SBarry Smith op == MAT_SYMMETRIC || 6676d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 6686d4a8577SBarry Smith op == MAT_YES_NEW_DIAGONALS) 66994a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 6706d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 6716d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 6722593348eSBarry Smith else 673b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 6742593348eSBarry Smith return 0; 6752593348eSBarry Smith } 6762593348eSBarry Smith 6772593348eSBarry Smith 6782593348eSBarry Smith /* -------------------------------------------------------*/ 6792593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 6802593348eSBarry Smith /* -------------------------------------------------------*/ 681b6490206SBarry Smith #include "pinclude/plapack.h" 682b6490206SBarry Smith 68339b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 6842593348eSBarry Smith { 685b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 68639b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 687c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 6882593348eSBarry Smith 689c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 690c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 691b6490206SBarry Smith 692119a934aSSatish Balay idx = a->j; 693119a934aSSatish Balay v = a->a; 694119a934aSSatish Balay ii = a->i; 695119a934aSSatish Balay 696119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 697119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 698119a934aSSatish Balay sum = 0.0; 699119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 7001a6a6d98SBarry Smith z[i] = sum; 701119a934aSSatish Balay } 702c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 703c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 70439b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 70539b95ed1SBarry Smith return 0; 70639b95ed1SBarry Smith } 70739b95ed1SBarry Smith 70839b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 70939b95ed1SBarry Smith { 71039b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 71139b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 71239b95ed1SBarry Smith register Scalar x1,x2; 71339b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 714c16cb8f2SBarry Smith int j,n; 71539b95ed1SBarry Smith 716c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 717c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 71839b95ed1SBarry Smith 71939b95ed1SBarry Smith idx = a->j; 72039b95ed1SBarry Smith v = a->a; 72139b95ed1SBarry Smith ii = a->i; 72239b95ed1SBarry Smith 723119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 724119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 725119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 726119a934aSSatish Balay for ( j=0; j<n; j++ ) { 727119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 728119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 729119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 730119a934aSSatish Balay v += 4; 731119a934aSSatish Balay } 7321a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 733119a934aSSatish Balay z += 2; 734119a934aSSatish Balay } 735c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 736c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 73739b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 73839b95ed1SBarry Smith return 0; 73939b95ed1SBarry Smith } 74039b95ed1SBarry Smith 74139b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 74239b95ed1SBarry Smith { 74339b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 74439b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 745c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 74639b95ed1SBarry Smith 747c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 748c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 74939b95ed1SBarry Smith 75039b95ed1SBarry Smith idx = a->j; 75139b95ed1SBarry Smith v = a->a; 75239b95ed1SBarry Smith ii = a->i; 75339b95ed1SBarry Smith 754119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 755119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 756119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 757119a934aSSatish Balay for ( j=0; j<n; j++ ) { 758119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 759119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 760119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 761119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 762119a934aSSatish Balay v += 9; 763119a934aSSatish Balay } 7641a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 765119a934aSSatish Balay z += 3; 766119a934aSSatish Balay } 767c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 768c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 76939b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 77039b95ed1SBarry Smith return 0; 77139b95ed1SBarry Smith } 77239b95ed1SBarry Smith 77339b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 77439b95ed1SBarry Smith { 77539b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77639b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 77739b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 77839b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 779c16cb8f2SBarry Smith int j,n; 78039b95ed1SBarry Smith 781c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 782c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 78339b95ed1SBarry Smith 78439b95ed1SBarry Smith idx = a->j; 78539b95ed1SBarry Smith v = a->a; 78639b95ed1SBarry Smith ii = a->i; 78739b95ed1SBarry Smith 788119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 789119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 790119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 791119a934aSSatish Balay for ( j=0; j<n; j++ ) { 792119a934aSSatish Balay xb = x + 4*(*idx++); 793119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 794119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 795119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 796119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 797119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 798119a934aSSatish Balay v += 16; 799119a934aSSatish Balay } 8001a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 801119a934aSSatish Balay z += 4; 802119a934aSSatish Balay } 803c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 804c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 80539b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 80639b95ed1SBarry Smith return 0; 80739b95ed1SBarry Smith } 80839b95ed1SBarry Smith 80939b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 81039b95ed1SBarry Smith { 81139b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 81239b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 81339b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 814c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 81539b95ed1SBarry Smith 816c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 817c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 81839b95ed1SBarry Smith 81939b95ed1SBarry Smith idx = a->j; 82039b95ed1SBarry Smith v = a->a; 82139b95ed1SBarry Smith ii = a->i; 82239b95ed1SBarry Smith 823119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 824119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 825119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 826119a934aSSatish Balay for ( j=0; j<n; j++ ) { 827119a934aSSatish Balay xb = x + 5*(*idx++); 828119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 829119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 830119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 831119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 832119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 833119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 834119a934aSSatish Balay v += 25; 835119a934aSSatish Balay } 8361a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 837119a934aSSatish Balay z += 5; 838119a934aSSatish Balay } 839c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 840c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 84139b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 84239b95ed1SBarry Smith return 0; 84339b95ed1SBarry Smith } 84439b95ed1SBarry Smith 84539b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 84639b95ed1SBarry Smith { 84739b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 84839b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 849c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 850c16cb8f2SBarry Smith int _One = 1,ncols,k; 851c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 85239b95ed1SBarry Smith 853c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 854c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 85539b95ed1SBarry Smith 85639b95ed1SBarry Smith idx = a->j; 85739b95ed1SBarry Smith v = a->a; 85839b95ed1SBarry Smith ii = a->i; 85939b95ed1SBarry Smith 86039b95ed1SBarry Smith 861119a934aSSatish Balay if (!a->mult_work) { 8623b547af2SSatish Balay k = PetscMax(a->m,a->n); 8632b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 864119a934aSSatish Balay } 865119a934aSSatish Balay work = a->mult_work; 866119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 867119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 868119a934aSSatish Balay ncols = n*bs; 869119a934aSSatish Balay workt = work; 870119a934aSSatish Balay for ( j=0; j<n; j++ ) { 871119a934aSSatish Balay xb = x + bs*(*idx++); 872119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 873119a934aSSatish Balay workt += bs; 874119a934aSSatish Balay } 8751a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 876119a934aSSatish Balay v += n*bs2; 877119a934aSSatish Balay z += bs; 878119a934aSSatish Balay } 879c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 880c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 8811a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 882bb42667fSSatish Balay return 0; 883bb42667fSSatish Balay } 884bb42667fSSatish Balay 885f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 886f44d3a6dSSatish Balay { 887f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 888f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 889c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 890f44d3a6dSSatish Balay 891c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 892c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 893c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 894f44d3a6dSSatish Balay 895f44d3a6dSSatish Balay idx = a->j; 896f44d3a6dSSatish Balay v = a->a; 897f44d3a6dSSatish Balay ii = a->i; 898f44d3a6dSSatish Balay 899f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 900f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 901f44d3a6dSSatish Balay sum = y[i]; 902f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 903f44d3a6dSSatish Balay z[i] = sum; 904f44d3a6dSSatish Balay } 905c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 906c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 907c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 908f44d3a6dSSatish Balay PLogFlops(2*a->nz); 909f44d3a6dSSatish Balay return 0; 910f44d3a6dSSatish Balay } 911f44d3a6dSSatish Balay 912f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 913f44d3a6dSSatish Balay { 914f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 915f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 916f44d3a6dSSatish Balay register Scalar x1,x2; 917f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 918c16cb8f2SBarry Smith int j,n; 919f44d3a6dSSatish Balay 920c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 921c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 922c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 923f44d3a6dSSatish Balay 924f44d3a6dSSatish Balay idx = a->j; 925f44d3a6dSSatish Balay v = a->a; 926f44d3a6dSSatish Balay ii = a->i; 927f44d3a6dSSatish Balay 928f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 929f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 930f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 931f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 932f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 933f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 934f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 935f44d3a6dSSatish Balay v += 4; 936f44d3a6dSSatish Balay } 937f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 938f44d3a6dSSatish Balay z += 2; y += 2; 939f44d3a6dSSatish Balay } 940c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 941c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 942c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 943f44d3a6dSSatish Balay PLogFlops(4*a->nz); 944f44d3a6dSSatish Balay return 0; 945f44d3a6dSSatish Balay } 946f44d3a6dSSatish Balay 947f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 948f44d3a6dSSatish Balay { 949f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 950f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 951c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 952f44d3a6dSSatish Balay 953c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 954c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 955c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 956f44d3a6dSSatish Balay 957f44d3a6dSSatish Balay idx = a->j; 958f44d3a6dSSatish Balay v = a->a; 959f44d3a6dSSatish Balay ii = a->i; 960f44d3a6dSSatish Balay 961f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 962f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 963f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 964f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 965f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 966f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 967f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 968f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 969f44d3a6dSSatish Balay v += 9; 970f44d3a6dSSatish Balay } 971f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 972f44d3a6dSSatish Balay z += 3; y += 3; 973f44d3a6dSSatish Balay } 974c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 975c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 976c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 977f44d3a6dSSatish Balay PLogFlops(18*a->nz); 978f44d3a6dSSatish Balay return 0; 979f44d3a6dSSatish Balay } 980f44d3a6dSSatish Balay 981f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 982f44d3a6dSSatish Balay { 983f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 984f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 985f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 986f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 987c16cb8f2SBarry Smith int j,n; 988f44d3a6dSSatish Balay 989c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 990c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 991c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 992f44d3a6dSSatish Balay 993f44d3a6dSSatish Balay idx = a->j; 994f44d3a6dSSatish Balay v = a->a; 995f44d3a6dSSatish Balay ii = a->i; 996f44d3a6dSSatish Balay 997f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 998f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 999f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1000f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1001f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1002f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1003f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1004f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1005f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1006f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1007f44d3a6dSSatish Balay v += 16; 1008f44d3a6dSSatish Balay } 1009f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1010f44d3a6dSSatish Balay z += 4; y += 4; 1011f44d3a6dSSatish Balay } 1012c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1013c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1014c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1015f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1016f44d3a6dSSatish Balay return 0; 1017f44d3a6dSSatish Balay } 1018f44d3a6dSSatish Balay 1019f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1020f44d3a6dSSatish Balay { 1021f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1022f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1023f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1024c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1025f44d3a6dSSatish Balay 1026c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1027c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1028c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1029f44d3a6dSSatish Balay 1030f44d3a6dSSatish Balay idx = a->j; 1031f44d3a6dSSatish Balay v = a->a; 1032f44d3a6dSSatish Balay ii = a->i; 1033f44d3a6dSSatish Balay 1034f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1035f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1036f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1037f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1038f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1039f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1040f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1041f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1042f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1043f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1044f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1045f44d3a6dSSatish Balay v += 25; 1046f44d3a6dSSatish Balay } 1047f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1048f44d3a6dSSatish Balay z += 5; y += 5; 1049f44d3a6dSSatish Balay } 1050c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1051c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1052c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1053f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1054f44d3a6dSSatish Balay return 0; 1055f44d3a6dSSatish Balay } 1056f44d3a6dSSatish Balay 1057f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1058f44d3a6dSSatish Balay { 1059f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1060f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1061f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1062f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1063f44d3a6dSSatish Balay 1064f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1065f44d3a6dSSatish Balay 1066c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1067c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1068f44d3a6dSSatish Balay 1069f44d3a6dSSatish Balay idx = a->j; 1070f44d3a6dSSatish Balay v = a->a; 1071f44d3a6dSSatish Balay ii = a->i; 1072f44d3a6dSSatish Balay 1073f44d3a6dSSatish Balay 1074f44d3a6dSSatish Balay if (!a->mult_work) { 1075f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1076f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1077f44d3a6dSSatish Balay } 1078f44d3a6dSSatish Balay work = a->mult_work; 1079f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1080f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1081f44d3a6dSSatish Balay ncols = n*bs; 1082f44d3a6dSSatish Balay workt = work; 1083f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1084f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1085f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1086f44d3a6dSSatish Balay workt += bs; 1087f44d3a6dSSatish Balay } 1088f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1089f44d3a6dSSatish Balay v += n*bs2; 1090f44d3a6dSSatish Balay z += bs; 1091f44d3a6dSSatish Balay } 1092c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1093c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1094f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1095f44d3a6dSSatish Balay return 0; 1096f44d3a6dSSatish Balay } 1097f44d3a6dSSatish Balay 10981a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1099bb42667fSSatish Balay { 1100bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 11011a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1102bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1103bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 11049df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1105119a934aSSatish Balay 1106119a934aSSatish Balay 1107bb42667fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1108bb42667fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1109bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1110bb42667fSSatish Balay 1111119a934aSSatish Balay idx = a->j; 1112119a934aSSatish Balay v = a->a; 1113119a934aSSatish Balay ii = a->i; 1114119a934aSSatish Balay 1115119a934aSSatish Balay switch (bs) { 1116119a934aSSatish Balay case 1: 1117119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1118119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1119119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1120119a934aSSatish Balay ib = idx + ai[i]; 1121119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1122bb1453f0SSatish Balay rval = ib[j]; 1123bb1453f0SSatish Balay z[rval] += *v++ * x1; 1124119a934aSSatish Balay } 1125119a934aSSatish Balay } 1126119a934aSSatish Balay break; 1127119a934aSSatish Balay case 2: 1128119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1129119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1130119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1131119a934aSSatish Balay ib = idx + ai[i]; 1132119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1133119a934aSSatish Balay rval = ib[j]*2; 1134bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1135bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1136119a934aSSatish Balay v += 4; 1137119a934aSSatish Balay } 1138119a934aSSatish Balay } 1139119a934aSSatish Balay break; 1140119a934aSSatish Balay case 3: 1141119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1142119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1143119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1144119a934aSSatish Balay ib = idx + ai[i]; 1145119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1146119a934aSSatish Balay rval = ib[j]*3; 1147bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1148bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1149bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1150119a934aSSatish Balay v += 9; 1151119a934aSSatish Balay } 1152119a934aSSatish Balay } 1153119a934aSSatish Balay break; 1154119a934aSSatish Balay case 4: 1155119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1156119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1157119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1158119a934aSSatish Balay ib = idx + ai[i]; 1159119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1160119a934aSSatish Balay rval = ib[j]*4; 1161bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1162bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1163bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1164bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1165119a934aSSatish Balay v += 16; 1166119a934aSSatish Balay } 1167119a934aSSatish Balay } 1168119a934aSSatish Balay break; 1169119a934aSSatish Balay case 5: 1170119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1171119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1172119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1173119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1174119a934aSSatish Balay ib = idx + ai[i]; 1175119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1176119a934aSSatish Balay rval = ib[j]*5; 1177bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1178bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1179bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1180bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1181bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1182119a934aSSatish Balay v += 25; 1183119a934aSSatish Balay } 1184119a934aSSatish Balay } 1185119a934aSSatish Balay break; 1186119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1187119a934aSSatish Balay default: { 1188119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1189119a934aSSatish Balay if (!a->mult_work) { 11903b547af2SSatish Balay k = PetscMax(a->m,a->n); 1191bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1192119a934aSSatish Balay CHKPTRQ(a->mult_work); 1193119a934aSSatish Balay } 1194119a934aSSatish Balay work = a->mult_work; 1195119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1196119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1197119a934aSSatish Balay ncols = n*bs; 1198119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1199119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1200119a934aSSatish Balay v += n*bs2; 1201119a934aSSatish Balay x += bs; 1202119a934aSSatish Balay workt = work; 1203119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1204119a934aSSatish Balay zb = z + bs*(*idx++); 1205bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1206119a934aSSatish Balay workt += bs; 1207119a934aSSatish Balay } 1208119a934aSSatish Balay } 1209119a934aSSatish Balay } 1210119a934aSSatish Balay } 12110419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 12120419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1213faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1214faf6580fSSatish Balay return 0; 1215faf6580fSSatish Balay } 1216faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1217faf6580fSSatish Balay { 1218faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1219faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1220faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1221faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1222faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1223faf6580fSSatish Balay 1224faf6580fSSatish Balay 1225faf6580fSSatish Balay 1226faf6580fSSatish Balay ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1227faf6580fSSatish Balay ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1228faf6580fSSatish Balay 1229648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1230648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1231648c1d95SSatish Balay 1232faf6580fSSatish Balay 1233faf6580fSSatish Balay idx = a->j; 1234faf6580fSSatish Balay v = a->a; 1235faf6580fSSatish Balay ii = a->i; 1236faf6580fSSatish Balay 1237faf6580fSSatish Balay switch (bs) { 1238faf6580fSSatish Balay case 1: 1239faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1240faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1241faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1242faf6580fSSatish Balay ib = idx + ai[i]; 1243faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1244faf6580fSSatish Balay rval = ib[j]; 1245faf6580fSSatish Balay z[rval] += *v++ * x1; 1246faf6580fSSatish Balay } 1247faf6580fSSatish Balay } 1248faf6580fSSatish Balay break; 1249faf6580fSSatish Balay case 2: 1250faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1251faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1252faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1253faf6580fSSatish Balay ib = idx + ai[i]; 1254faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1255faf6580fSSatish Balay rval = ib[j]*2; 1256faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1257faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1258faf6580fSSatish Balay v += 4; 1259faf6580fSSatish Balay } 1260faf6580fSSatish Balay } 1261faf6580fSSatish Balay break; 1262faf6580fSSatish Balay case 3: 1263faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1264faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1265faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1266faf6580fSSatish Balay ib = idx + ai[i]; 1267faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1268faf6580fSSatish Balay rval = ib[j]*3; 1269faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1270faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1271faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1272faf6580fSSatish Balay v += 9; 1273faf6580fSSatish Balay } 1274faf6580fSSatish Balay } 1275faf6580fSSatish Balay break; 1276faf6580fSSatish Balay case 4: 1277faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1278faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1279faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1280faf6580fSSatish Balay ib = idx + ai[i]; 1281faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1282faf6580fSSatish Balay rval = ib[j]*4; 1283faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1284faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1285faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1286faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1287faf6580fSSatish Balay v += 16; 1288faf6580fSSatish Balay } 1289faf6580fSSatish Balay } 1290faf6580fSSatish Balay break; 1291faf6580fSSatish Balay case 5: 1292faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1293faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1294faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1295faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1296faf6580fSSatish Balay ib = idx + ai[i]; 1297faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1298faf6580fSSatish Balay rval = ib[j]*5; 1299faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1300faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1301faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1302faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1303faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1304faf6580fSSatish Balay v += 25; 1305faf6580fSSatish Balay } 1306faf6580fSSatish Balay } 1307faf6580fSSatish Balay break; 1308faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1309faf6580fSSatish Balay default: { 1310faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1311faf6580fSSatish Balay if (!a->mult_work) { 1312faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1313faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1314faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1315faf6580fSSatish Balay } 1316faf6580fSSatish Balay work = a->mult_work; 1317faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1318faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1319faf6580fSSatish Balay ncols = n*bs; 1320faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1321faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1322faf6580fSSatish Balay v += n*bs2; 1323faf6580fSSatish Balay x += bs; 1324faf6580fSSatish Balay workt = work; 1325faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1326faf6580fSSatish Balay zb = z + bs*(*idx++); 1327faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1328faf6580fSSatish Balay workt += bs; 1329faf6580fSSatish Balay } 1330faf6580fSSatish Balay } 1331faf6580fSSatish Balay } 1332faf6580fSSatish Balay } 1333faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1334faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1335faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 13362593348eSBarry Smith return 0; 13372593348eSBarry Smith } 13382593348eSBarry Smith 1339*4e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 13402593348eSBarry Smith { 1341b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1342*4e220ebcSLois Curfman McInnes 1343*4e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 1344*4e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 1345*4e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 1346*4e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 1347*4e220ebcSLois Curfman McInnes info->block_size = a->bs2; 1348*4e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 1349*4e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 1350*4e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1351*4e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 1352*4e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1353*4e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 1354*4e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 1355*4e220ebcSLois Curfman McInnes info->memory = A->mem; 1356*4e220ebcSLois Curfman McInnes if (A->factor) { 1357*4e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 1358*4e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 1359*4e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 1360*4e220ebcSLois Curfman McInnes } else { 1361*4e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 1362*4e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 1363*4e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 1364*4e220ebcSLois Curfman McInnes } 13652593348eSBarry Smith return 0; 13662593348eSBarry Smith } 13672593348eSBarry Smith 136891d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 136991d316f6SSatish Balay { 137091d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 137191d316f6SSatish Balay 137291d316f6SSatish Balay if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 137391d316f6SSatish Balay 137491d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 137591d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1376a7c10996SSatish Balay (a->nz != b->nz)) { 137791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 137891d316f6SSatish Balay } 137991d316f6SSatish Balay 138091d316f6SSatish Balay /* if the a->i are the same */ 138191d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 138291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 138391d316f6SSatish Balay } 138491d316f6SSatish Balay 138591d316f6SSatish Balay /* if a->j are the same */ 138691d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 138791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 138891d316f6SSatish Balay } 138991d316f6SSatish Balay 139091d316f6SSatish Balay /* if a->a are the same */ 139191d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 139291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 139391d316f6SSatish Balay } 139491d316f6SSatish Balay *flg = PETSC_TRUE; 139591d316f6SSatish Balay return 0; 139691d316f6SSatish Balay 139791d316f6SSatish Balay } 139891d316f6SSatish Balay 139991d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 140091d316f6SSatish Balay { 140191d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14027e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 140317e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 140417e48fc4SSatish Balay 140517e48fc4SSatish Balay bs = a->bs; 140617e48fc4SSatish Balay aa = a->a; 140717e48fc4SSatish Balay ai = a->i; 140817e48fc4SSatish Balay aj = a->j; 140917e48fc4SSatish Balay ambs = a->mbs; 14109df24120SSatish Balay bs2 = a->bs2; 141191d316f6SSatish Balay 141291d316f6SSatish Balay VecSet(&zero,v); 141391d316f6SSatish Balay VecGetArray(v,&x); VecGetLocalSize(v,&n); 14149867e207SSatish Balay if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 141517e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 141617e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 141717e48fc4SSatish Balay if (aj[j] == i) { 141817e48fc4SSatish Balay row = i*bs; 14197e67e3f9SSatish Balay aa_j = aa+j*bs2; 14207e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 142191d316f6SSatish Balay break; 142291d316f6SSatish Balay } 142391d316f6SSatish Balay } 142491d316f6SSatish Balay } 142591d316f6SSatish Balay return 0; 142691d316f6SSatish Balay } 142791d316f6SSatish Balay 14289867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 14299867e207SSatish Balay { 14309867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14319867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 14327e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 14339867e207SSatish Balay 14349867e207SSatish Balay ai = a->i; 14359867e207SSatish Balay aj = a->j; 14369867e207SSatish Balay aa = a->a; 14379867e207SSatish Balay m = a->m; 14389867e207SSatish Balay n = a->n; 14399867e207SSatish Balay bs = a->bs; 14409867e207SSatish Balay mbs = a->mbs; 14419df24120SSatish Balay bs2 = a->bs2; 14429867e207SSatish Balay if (ll) { 14439867e207SSatish Balay VecGetArray(ll,&l); VecGetSize(ll,&lm); 14449867e207SSatish Balay if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 14459867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 14469867e207SSatish Balay M = ai[i+1] - ai[i]; 14479867e207SSatish Balay li = l + i*bs; 14487e67e3f9SSatish Balay v = aa + bs2*ai[i]; 14499867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 14507e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 14519867e207SSatish Balay (*v++) *= li[k%bs]; 14529867e207SSatish Balay } 14539867e207SSatish Balay } 14549867e207SSatish Balay } 14559867e207SSatish Balay } 14569867e207SSatish Balay 14579867e207SSatish Balay if (rr) { 14589867e207SSatish Balay VecGetArray(rr,&r); VecGetSize(rr,&rn); 14599867e207SSatish Balay if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 14609867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 14619867e207SSatish Balay M = ai[i+1] - ai[i]; 14627e67e3f9SSatish Balay v = aa + bs2*ai[i]; 14639867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 14649867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 14659867e207SSatish Balay for ( k=0; k<bs; k++ ) { 14669867e207SSatish Balay x = ri[k]; 14679867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 14689867e207SSatish Balay } 14699867e207SSatish Balay } 14709867e207SSatish Balay } 14719867e207SSatish Balay } 14729867e207SSatish Balay return 0; 14739867e207SSatish Balay } 14749867e207SSatish Balay 14759867e207SSatish Balay 1476b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1477b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 14782a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1479736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1480736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 14811a6a6d98SBarry Smith 14821a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 14831a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 14841a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 14851a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 14861a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 14871a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 14881a6a6d98SBarry Smith 14897fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 14907fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 14917fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 14927fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 14937fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 14947fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 14952593348eSBarry Smith 1496b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 14972593348eSBarry Smith { 1498b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14992593348eSBarry Smith Scalar *v = a->a; 15002593348eSBarry Smith double sum = 0.0; 15019df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 15022593348eSBarry Smith 15032593348eSBarry Smith if (type == NORM_FROBENIUS) { 15040419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 15052593348eSBarry Smith #if defined(PETSC_COMPLEX) 15062593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 15072593348eSBarry Smith #else 15082593348eSBarry Smith sum += (*v)*(*v); v++; 15092593348eSBarry Smith #endif 15102593348eSBarry Smith } 15112593348eSBarry Smith *norm = sqrt(sum); 15122593348eSBarry Smith } 15132593348eSBarry Smith else { 1514b6490206SBarry Smith SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 15152593348eSBarry Smith } 15162593348eSBarry Smith return 0; 15172593348eSBarry Smith } 15182593348eSBarry Smith 15192593348eSBarry Smith /* 15202593348eSBarry Smith note: This can only work for identity for row and col. It would 15212593348eSBarry Smith be good to check this and otherwise generate an error. 15222593348eSBarry Smith */ 1523b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 15242593348eSBarry Smith { 1525b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 15262593348eSBarry Smith Mat outA; 1527de6a44a3SBarry Smith int ierr; 15282593348eSBarry Smith 1529b6490206SBarry Smith if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 15302593348eSBarry Smith 15312593348eSBarry Smith outA = inA; 15322593348eSBarry Smith inA->factor = FACTOR_LU; 15332593348eSBarry Smith a->row = row; 15342593348eSBarry Smith a->col = col; 15352593348eSBarry Smith 1536de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 15372593348eSBarry Smith 15382593348eSBarry Smith if (!a->diag) { 1539b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 15402593348eSBarry Smith } 15417fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 15422593348eSBarry Smith return 0; 15432593348eSBarry Smith } 15442593348eSBarry Smith 1545b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 15462593348eSBarry Smith { 1547b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 15489df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1549b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1550b6490206SBarry Smith PLogFlops(totalnz); 15512593348eSBarry Smith return 0; 15522593348eSBarry Smith } 15532593348eSBarry Smith 15547e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 15557e67e3f9SSatish Balay { 15567e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 15577e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1558a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 15599df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 15607e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 15617e67e3f9SSatish Balay 15627e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 15637e67e3f9SSatish Balay row = im[k]; brow = row/bs; 15647e67e3f9SSatish Balay if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 15657e67e3f9SSatish Balay if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1566a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 15677e67e3f9SSatish Balay nrow = ailen[brow]; 15687e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 15697e67e3f9SSatish Balay if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 15707e67e3f9SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1571a7c10996SSatish Balay col = in[l] ; 15727e67e3f9SSatish Balay bcol = col/bs; 15737e67e3f9SSatish Balay cidx = col%bs; 15747e67e3f9SSatish Balay ridx = row%bs; 15757e67e3f9SSatish Balay high = nrow; 15767e67e3f9SSatish Balay low = 0; /* assume unsorted */ 15777e67e3f9SSatish Balay while (high-low > 5) { 15787e67e3f9SSatish Balay t = (low+high)/2; 15797e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 15807e67e3f9SSatish Balay else low = t; 15817e67e3f9SSatish Balay } 15827e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 15837e67e3f9SSatish Balay if (rp[i] > bcol) break; 15847e67e3f9SSatish Balay if (rp[i] == bcol) { 15857e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 15867e67e3f9SSatish Balay goto finished; 15877e67e3f9SSatish Balay } 15887e67e3f9SSatish Balay } 15897e67e3f9SSatish Balay *v++ = zero; 15907e67e3f9SSatish Balay finished:; 15917e67e3f9SSatish Balay } 15927e67e3f9SSatish Balay } 15937e67e3f9SSatish Balay return 0; 15947e67e3f9SSatish Balay } 15957e67e3f9SSatish Balay 15965a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 15975a838052SSatish Balay { 15985a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 15995a838052SSatish Balay *bs = baij->bs; 16005a838052SSatish Balay return 0; 16015a838052SSatish Balay } 16025a838052SSatish Balay 1603d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 1604d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1605d9b7c43dSSatish Balay { 1606d9b7c43dSSatish Balay int i,row; 1607d9b7c43dSSatish Balay row = idx[0]; 1608d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1609d9b7c43dSSatish Balay 1610d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1611d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1612d9b7c43dSSatish Balay } 1613d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1614d9b7c43dSSatish Balay return 0; 1615d9b7c43dSSatish Balay } 1616d9b7c43dSSatish Balay 1617d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1618d9b7c43dSSatish Balay { 1619d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1620d9b7c43dSSatish Balay IS is_local; 1621d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1622d9b7c43dSSatish Balay PetscTruth flg; 1623d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1624d9b7c43dSSatish Balay 1625d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1626d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1627d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1628537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1629d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1630d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1631d9b7c43dSSatish Balay 1632d9b7c43dSSatish Balay i = 0; 1633d9b7c43dSSatish Balay while (i < is_n) { 1634d9b7c43dSSatish Balay if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1635d9b7c43dSSatish Balay flg = PETSC_FALSE; 1636d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1637d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1638d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1639d9b7c43dSSatish Balay i += bs; 1640d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1641d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1642d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1643d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1644d9b7c43dSSatish Balay aa[0] = zero; 1645d9b7c43dSSatish Balay aa+=bs; 1646d9b7c43dSSatish Balay } 1647d9b7c43dSSatish Balay i++; 1648d9b7c43dSSatish Balay } 1649d9b7c43dSSatish Balay } 1650d9b7c43dSSatish Balay if (diag) { 1651d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1652d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1653d9b7c43dSSatish Balay } 1654d9b7c43dSSatish Balay } 1655d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1656d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1657d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 1658d9b7c43dSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1659d9b7c43dSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1660d9b7c43dSSatish Balay 1661d9b7c43dSSatish Balay return 0; 1662d9b7c43dSSatish Balay } 1663ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1664ba4ca20aSSatish Balay { 1665ba4ca20aSSatish Balay static int called = 0; 1666ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1667ba4ca20aSSatish Balay 1668ba4ca20aSSatish Balay if (called) return 0; else called = 1; 1669ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1670ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 1671ba4ca20aSSatish Balay return 0; 1672ba4ca20aSSatish Balay } 1673d9b7c43dSSatish Balay 16742593348eSBarry Smith /* -------------------------------------------------------------------*/ 1675cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 16769867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1677f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1678faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 16791a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 1680b6490206SBarry Smith 0,0, 1681de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 1682b6490206SBarry Smith 0, 1683f2501298SSatish Balay MatTranspose_SeqBAIJ, 168417e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 16859867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1686584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 1687b6490206SBarry Smith 0, 1688d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1689b6490206SBarry Smith MatGetReordering_SeqBAIJ, 16907fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1691b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1692de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 1693b6490206SBarry Smith 0,0,/* MatConvert_SeqBAIJ */ 0, 1694b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 1695b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 1696af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 16977e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 1698ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 16995a838052SSatish Balay 0,0,0,MatGetBlockSize_SeqBAIJ}; 17002593348eSBarry Smith 17012593348eSBarry Smith /*@C 170244cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 170344cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 170444cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 17052bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 17062bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 17072593348eSBarry Smith 17082593348eSBarry Smith Input Parameters: 17092593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 1710b6490206SBarry Smith . bs - size of block 17112593348eSBarry Smith . m - number of rows 17122593348eSBarry Smith . n - number of columns 1713b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 17142bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 17152bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 17162593348eSBarry Smith 17172593348eSBarry Smith Output Parameter: 17182593348eSBarry Smith . A - the matrix 17192593348eSBarry Smith 17202593348eSBarry Smith Notes: 172144cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 17222593348eSBarry Smith storage. That is, the stored row and column indices can begin at 172344cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 17242593348eSBarry Smith 17252593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 17262593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 17272593348eSBarry Smith allocation. For additional details, see the users manual chapter on 17282593348eSBarry Smith matrices and the file $(PETSC_DIR)/Performance. 17292593348eSBarry Smith 173044cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 17312593348eSBarry Smith @*/ 1732b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 17332593348eSBarry Smith { 17342593348eSBarry Smith Mat B; 1735b6490206SBarry Smith Mat_SeqBAIJ *b; 1736f2501298SSatish Balay int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1737b6490206SBarry Smith 1738f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 1739f2501298SSatish Balay SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 17402593348eSBarry Smith 17412593348eSBarry Smith *A = 0; 1742b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 17432593348eSBarry Smith PLogObjectCreate(B); 1744b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 174544cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 17462593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 17471a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 17481a6a6d98SBarry Smith if (!flg) { 17497fc0212eSBarry Smith switch (bs) { 17507fc0212eSBarry Smith case 1: 17517fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17521a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 175339b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 1754f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 17557fc0212eSBarry Smith break; 17564eeb42bcSBarry Smith case 2: 17574eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 17581a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 175939b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 1760f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 17616d84be18SBarry Smith break; 1762f327dd97SBarry Smith case 3: 1763f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 17641a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 176539b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 1766f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 17674eeb42bcSBarry Smith break; 17686d84be18SBarry Smith case 4: 17696d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 17701a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 177139b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 1772f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 17736d84be18SBarry Smith break; 17746d84be18SBarry Smith case 5: 17756d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 17761a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 177739b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 1778f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 17796d84be18SBarry Smith break; 17807fc0212eSBarry Smith } 17811a6a6d98SBarry Smith } 1782b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1783b6490206SBarry Smith B->view = MatView_SeqBAIJ; 17842593348eSBarry Smith B->factor = 0; 17852593348eSBarry Smith B->lupivotthreshold = 1.0; 17862593348eSBarry Smith b->row = 0; 17872593348eSBarry Smith b->col = 0; 17882593348eSBarry Smith b->reallocs = 0; 17892593348eSBarry Smith 179044cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 179144cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1792b6490206SBarry Smith b->mbs = mbs; 1793f2501298SSatish Balay b->nbs = nbs; 1794b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 17952593348eSBarry Smith if (nnz == PETSC_NULL) { 1796119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 17972593348eSBarry Smith else if (nz <= 0) nz = 1; 1798b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1799b6490206SBarry Smith nz = nz*mbs; 18002593348eSBarry Smith } 18012593348eSBarry Smith else { 18022593348eSBarry Smith nz = 0; 1803b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 18042593348eSBarry Smith } 18052593348eSBarry Smith 18062593348eSBarry Smith /* allocate the matrix space */ 18077e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 18082593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 18097e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 18107e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 18112593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 18122593348eSBarry Smith b->i = b->j + nz; 18132593348eSBarry Smith b->singlemalloc = 1; 18142593348eSBarry Smith 1815de6a44a3SBarry Smith b->i[0] = 0; 1816b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 18172593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 18182593348eSBarry Smith } 18192593348eSBarry Smith 1820b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1821b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1822b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1823b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 18242593348eSBarry Smith 1825b6490206SBarry Smith b->bs = bs; 18269df24120SSatish Balay b->bs2 = bs2; 1827b6490206SBarry Smith b->mbs = mbs; 18282593348eSBarry Smith b->nz = 0; 182918e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 18302593348eSBarry Smith b->sorted = 0; 18312593348eSBarry Smith b->roworiented = 1; 18322593348eSBarry Smith b->nonew = 0; 18332593348eSBarry Smith b->diag = 0; 18342593348eSBarry Smith b->solve_work = 0; 1835de6a44a3SBarry Smith b->mult_work = 0; 18362593348eSBarry Smith b->spptr = 0; 1837*4e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 1838*4e220ebcSLois Curfman McInnes 18392593348eSBarry Smith *A = B; 18402593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 18412593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 18422593348eSBarry Smith return 0; 18432593348eSBarry Smith } 18442593348eSBarry Smith 1845b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 18462593348eSBarry Smith { 18472593348eSBarry Smith Mat C; 1848b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 18499df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1850de6a44a3SBarry Smith 1851de6a44a3SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 18522593348eSBarry Smith 18532593348eSBarry Smith *B = 0; 1854b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 18552593348eSBarry Smith PLogObjectCreate(C); 1856b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 18572593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1858b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1859b6490206SBarry Smith C->view = MatView_SeqBAIJ; 18602593348eSBarry Smith C->factor = A->factor; 18612593348eSBarry Smith c->row = 0; 18622593348eSBarry Smith c->col = 0; 18632593348eSBarry Smith C->assembled = PETSC_TRUE; 18642593348eSBarry Smith 186544cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 186644cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 186744cd7ae7SLois Curfman McInnes C->M = a->m; 186844cd7ae7SLois Curfman McInnes C->N = a->n; 186944cd7ae7SLois Curfman McInnes 1870b6490206SBarry Smith c->bs = a->bs; 18719df24120SSatish Balay c->bs2 = a->bs2; 1872b6490206SBarry Smith c->mbs = a->mbs; 1873b6490206SBarry Smith c->nbs = a->nbs; 18742593348eSBarry Smith 1875b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1876b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1877b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 18782593348eSBarry Smith c->imax[i] = a->imax[i]; 18792593348eSBarry Smith c->ilen[i] = a->ilen[i]; 18802593348eSBarry Smith } 18812593348eSBarry Smith 18822593348eSBarry Smith /* allocate the matrix space */ 18832593348eSBarry Smith c->singlemalloc = 1; 18847e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 18852593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 18867e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1887de6a44a3SBarry Smith c->i = c->j + nz; 1888b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1889b6490206SBarry Smith if (mbs > 0) { 1890de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 18912593348eSBarry Smith if (cpvalues == COPY_VALUES) { 18927e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 18932593348eSBarry Smith } 18942593348eSBarry Smith } 18952593348eSBarry Smith 1896b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 18972593348eSBarry Smith c->sorted = a->sorted; 18982593348eSBarry Smith c->roworiented = a->roworiented; 18992593348eSBarry Smith c->nonew = a->nonew; 19002593348eSBarry Smith 19012593348eSBarry Smith if (a->diag) { 1902b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1903b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1904b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 19052593348eSBarry Smith c->diag[i] = a->diag[i]; 19062593348eSBarry Smith } 19072593348eSBarry Smith } 19082593348eSBarry Smith else c->diag = 0; 19092593348eSBarry Smith c->nz = a->nz; 19102593348eSBarry Smith c->maxnz = a->maxnz; 19112593348eSBarry Smith c->solve_work = 0; 19122593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 19137fc0212eSBarry Smith c->mult_work = 0; 19142593348eSBarry Smith *B = C; 19152593348eSBarry Smith return 0; 19162593348eSBarry Smith } 19172593348eSBarry Smith 191819bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 19192593348eSBarry Smith { 1920b6490206SBarry Smith Mat_SeqBAIJ *a; 19212593348eSBarry Smith Mat B; 1922de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1923b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 192435aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1925a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 1926b6490206SBarry Smith Scalar *aa; 192719bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 19282593348eSBarry Smith 19291a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1930de6a44a3SBarry Smith bs2 = bs*bs; 1931b6490206SBarry Smith 19322593348eSBarry Smith MPI_Comm_size(comm,&size); 1933b6490206SBarry Smith if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 193490ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 193577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1936de6a44a3SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 19372593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 19382593348eSBarry Smith 1939b6490206SBarry Smith if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 194035aab85fSBarry Smith 194135aab85fSBarry Smith /* 194235aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 194335aab85fSBarry Smith divisible by the blocksize 194435aab85fSBarry Smith */ 1945b6490206SBarry Smith mbs = M/bs; 194635aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 194735aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 194835aab85fSBarry Smith else mbs++; 194935aab85fSBarry Smith if (extra_rows) { 1950537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 195135aab85fSBarry Smith } 1952b6490206SBarry Smith 19532593348eSBarry Smith /* read in row lengths */ 195435aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 195577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 195635aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 19572593348eSBarry Smith 1958b6490206SBarry Smith /* read in column indices */ 195935aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 196077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 196135aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1962b6490206SBarry Smith 1963b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1964b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1965b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 196635aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 196735aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 196835aab85fSBarry Smith masked = mask + mbs; 1969b6490206SBarry Smith rowcount = 0; nzcount = 0; 1970b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 197135aab85fSBarry Smith nmask = 0; 1972b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1973b6490206SBarry Smith kmax = rowlengths[rowcount]; 1974b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 197535aab85fSBarry Smith tmp = jj[nzcount++]/bs; 197635aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1977b6490206SBarry Smith } 1978b6490206SBarry Smith rowcount++; 1979b6490206SBarry Smith } 198035aab85fSBarry Smith browlengths[i] += nmask; 198135aab85fSBarry Smith /* zero out the mask elements we set */ 198235aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1983b6490206SBarry Smith } 1984b6490206SBarry Smith 19852593348eSBarry Smith /* create our matrix */ 198635aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 198735aab85fSBarry Smith CHKERRQ(ierr); 19882593348eSBarry Smith B = *A; 1989b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 19902593348eSBarry Smith 19912593348eSBarry Smith /* set matrix "i" values */ 1992de6a44a3SBarry Smith a->i[0] = 0; 1993b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 1994b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1995b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 19962593348eSBarry Smith } 1997b6490206SBarry Smith a->nz = 0; 1998b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 19992593348eSBarry Smith 2000b6490206SBarry Smith /* read in nonzero values */ 200135aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 200277c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 200335aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2004b6490206SBarry Smith 2005b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2006b6490206SBarry Smith nzcount = 0; jcount = 0; 2007b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2008b6490206SBarry Smith nzcountb = nzcount; 200935aab85fSBarry Smith nmask = 0; 2010b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2011b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2012b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 201335aab85fSBarry Smith tmp = jj[nzcount++]/bs; 201435aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2015b6490206SBarry Smith } 2016b6490206SBarry Smith rowcount++; 2017b6490206SBarry Smith } 2018de6a44a3SBarry Smith /* sort the masked values */ 201977c4ece6SBarry Smith PetscSortInt(nmask,masked); 2020de6a44a3SBarry Smith 2021b6490206SBarry Smith /* set "j" values into matrix */ 2022b6490206SBarry Smith maskcount = 1; 202335aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 202435aab85fSBarry Smith a->j[jcount++] = masked[j]; 2025de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2026b6490206SBarry Smith } 2027b6490206SBarry Smith /* set "a" values into matrix */ 2028de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2029b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2030b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2031b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2032de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2033de6a44a3SBarry Smith block = mask[tmp] - 1; 2034de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2035de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2036b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2037b6490206SBarry Smith } 2038b6490206SBarry Smith } 203935aab85fSBarry Smith /* zero out the mask elements we set */ 204035aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2041b6490206SBarry Smith } 204235aab85fSBarry Smith if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2043b6490206SBarry Smith 2044b6490206SBarry Smith PetscFree(rowlengths); 2045b6490206SBarry Smith PetscFree(browlengths); 2046b6490206SBarry Smith PetscFree(aa); 2047b6490206SBarry Smith PetscFree(jj); 2048b6490206SBarry Smith PetscFree(mask); 2049b6490206SBarry Smith 2050b6490206SBarry Smith B->assembled = PETSC_TRUE; 2051de6a44a3SBarry Smith 2052de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2053de6a44a3SBarry Smith if (flg) { 205419bcc07fSBarry Smith Viewer tviewer; 205519bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 205690ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 205719bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 205819bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2059de6a44a3SBarry Smith } 2060de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2061de6a44a3SBarry Smith if (flg) { 206219bcc07fSBarry Smith Viewer tviewer; 206319bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 206490ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 206519bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 206619bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2067de6a44a3SBarry Smith } 2068de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2069de6a44a3SBarry Smith if (flg) { 207019bcc07fSBarry Smith Viewer tviewer; 207119bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 207219bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 207319bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2074de6a44a3SBarry Smith } 2075de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2076de6a44a3SBarry Smith if (flg) { 207719bcc07fSBarry Smith Viewer tviewer; 207819bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 207990ace30eSBarry Smith ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 208019bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 208119bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2082de6a44a3SBarry Smith } 2083de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2084de6a44a3SBarry Smith if (flg) { 208519bcc07fSBarry Smith Viewer tviewer; 208619bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 208719bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 208819bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 208919bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2090de6a44a3SBarry Smith } 20912593348eSBarry Smith return 0; 20922593348eSBarry Smith } 20932593348eSBarry Smith 20942593348eSBarry Smith 20952593348eSBarry Smith 2096