12593348eSBarry Smith #ifndef lint 2*d402145bSBarry Smith static char vcid[] = "$Id: baij.c,v 1.83 1997/01/22 18:43:37 bsmith Exp bsmith $"; 32593348eSBarry Smith #endif 42593348eSBarry Smith 52593348eSBarry Smith /* 6b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 72593348eSBarry Smith matrix storage format. 82593348eSBarry Smith */ 970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 101a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 111a6a6d98SBarry Smith #include "src/inline/spops.h" 122593348eSBarry Smith #include "petsc.h" 133b547af2SSatish Balay 142593348eSBarry Smith 15de6a44a3SBarry Smith /* 16de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 17de6a44a3SBarry Smith */ 18de6a44a3SBarry Smith 195615d1e5SSatish Balay #undef __FUNC__ 205615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 22de6a44a3SBarry Smith { 23de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 247fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 25de6a44a3SBarry Smith 26de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 287fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 29de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30de6a44a3SBarry Smith if (a->j[j] == i) { 31de6a44a3SBarry Smith diag[i] = j; 32de6a44a3SBarry Smith break; 33de6a44a3SBarry Smith } 34de6a44a3SBarry Smith } 35de6a44a3SBarry Smith } 36de6a44a3SBarry Smith a->diag = diag; 37de6a44a3SBarry Smith return 0; 38de6a44a3SBarry Smith } 392593348eSBarry Smith 402593348eSBarry Smith #include "draw.h" 412593348eSBarry Smith #include "pinclude/pviewer.h" 4277c4ece6SBarry Smith #include "sys.h" 432593348eSBarry Smith 443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 453b2fbd54SBarry Smith 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 503b2fbd54SBarry Smith { 513b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 523b2fbd54SBarry Smith int ierr, n = a->mbs,i; 533b2fbd54SBarry Smith 543b2fbd54SBarry Smith *nn = n; 553b2fbd54SBarry Smith if (!ia) return 0; 563b2fbd54SBarry Smith if (symmetric) { 573b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 583b2fbd54SBarry Smith } else if (oshift == 1) { 593b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 603b2fbd54SBarry Smith int nz = a->i[n] + 1; 613b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 623b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 633b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 643b2fbd54SBarry Smith } else { 653b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 663b2fbd54SBarry Smith } 673b2fbd54SBarry Smith 683b2fbd54SBarry Smith return 0; 693b2fbd54SBarry Smith } 703b2fbd54SBarry Smith 715615d1e5SSatish Balay #undef __FUNC__ 725615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 743b2fbd54SBarry Smith PetscTruth *done) 753b2fbd54SBarry Smith { 763b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773b2fbd54SBarry Smith int i,n = a->mbs; 783b2fbd54SBarry Smith 793b2fbd54SBarry Smith if (!ia) return 0; 803b2fbd54SBarry Smith if (symmetric) { 813b2fbd54SBarry Smith PetscFree(*ia); 823b2fbd54SBarry Smith PetscFree(*ja); 83af5da2bfSBarry Smith } else if (oshift == 1) { 843b2fbd54SBarry Smith int nz = a->i[n]; 853b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 863b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 873b2fbd54SBarry Smith } 883b2fbd54SBarry Smith return 0; 893b2fbd54SBarry Smith } 903b2fbd54SBarry Smith 913b2fbd54SBarry Smith 925615d1e5SSatish Balay #undef __FUNC__ 935615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Binary" 94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 952593348eSBarry Smith { 96b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 979df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98b6490206SBarry Smith Scalar *aa; 992593348eSBarry Smith 10090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1012593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1022593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1033b2fbd54SBarry Smith 1042593348eSBarry Smith col_lens[1] = a->m; 1052593348eSBarry Smith col_lens[2] = a->n; 1067e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1072593348eSBarry Smith 1082593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 109b6490206SBarry Smith count = 0; 110b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 111b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 112b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113b6490206SBarry Smith } 1142593348eSBarry Smith } 11577c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1162593348eSBarry Smith PetscFree(col_lens); 1172593348eSBarry Smith 1182593348eSBarry Smith /* store column indices (zero start index) */ 1197e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120b6490206SBarry Smith count = 0; 121b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 122b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 123b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 125b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1262593348eSBarry Smith } 1272593348eSBarry Smith } 128b6490206SBarry Smith } 129b6490206SBarry Smith } 1307e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131b6490206SBarry Smith PetscFree(jj); 1322593348eSBarry Smith 1332593348eSBarry Smith /* store nonzero values */ 1347e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135b6490206SBarry Smith count = 0; 136b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 137b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 138b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1407e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 141b6490206SBarry Smith } 142b6490206SBarry Smith } 143b6490206SBarry Smith } 144b6490206SBarry Smith } 1457e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146b6490206SBarry Smith PetscFree(aa); 1472593348eSBarry Smith return 0; 1482593348eSBarry Smith } 1492593348eSBarry Smith 1505615d1e5SSatish Balay #undef __FUNC__ 1515615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1532593348eSBarry Smith { 154b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1559df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1562593348eSBarry Smith FILE *fd; 1572593348eSBarry Smith char *outputname; 1582593348eSBarry Smith 15990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1602593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 162639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1637ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1642593348eSBarry Smith } 165639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166e3372554SBarry Smith SETERRQ(1,0,"Matlab format not supported"); 1672593348eSBarry Smith } 168639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 16944cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17044cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17144cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17244cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17344cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 17544cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17644cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17744cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 17844cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 17944cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18044cd7ae7SLois Curfman McInnes #else 18144cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18344cd7ae7SLois Curfman McInnes #endif 18444cd7ae7SLois Curfman McInnes } 18544cd7ae7SLois Curfman McInnes } 18644cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes } 1902593348eSBarry Smith else { 191b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 192b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 193b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 194b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1977e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 19888685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 1997e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20088685aaeSLois Curfman McInnes } 20188685aaeSLois Curfman McInnes else { 2027e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 20488685aaeSLois Curfman McInnes #else 2057e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20688685aaeSLois Curfman McInnes #endif 2072593348eSBarry Smith } 2082593348eSBarry Smith } 2092593348eSBarry Smith fprintf(fd,"\n"); 2102593348eSBarry Smith } 2112593348eSBarry Smith } 212b6490206SBarry Smith } 2132593348eSBarry Smith fflush(fd); 2142593348eSBarry Smith return 0; 2152593348eSBarry Smith } 2162593348eSBarry Smith 2175615d1e5SSatish Balay #undef __FUNC__ 2185615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ_Draw" 2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2203270192aSSatish Balay { 2213270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2223270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2233270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2243270192aSSatish Balay Scalar *aa; 2253270192aSSatish Balay Draw draw; 2263270192aSSatish Balay DrawButton button; 2273270192aSSatish Balay PetscTruth isnull; 2283270192aSSatish Balay 2293270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2303270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2313270192aSSatish Balay 2323270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2333270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2343270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2353270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2363270192aSSatish Balay color = DRAW_BLUE; 2373270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2383270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2393270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2403270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2413270192aSSatish Balay aa = a->a + j*bs2; 2423270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2433270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2443270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 2453270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2463270192aSSatish Balay } 2473270192aSSatish Balay } 2483270192aSSatish Balay } 2493270192aSSatish Balay } 2503270192aSSatish Balay color = DRAW_CYAN; 2513270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2523270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2533270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2543270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2553270192aSSatish Balay aa = a->a + j*bs2; 2563270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2573270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2583270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 2593270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2603270192aSSatish Balay } 2613270192aSSatish Balay } 2623270192aSSatish Balay } 2633270192aSSatish Balay } 2643270192aSSatish Balay 2653270192aSSatish Balay color = DRAW_RED; 2663270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2673270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2683270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2693270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2703270192aSSatish Balay aa = a->a + j*bs2; 2713270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2723270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2733270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 2743270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2753270192aSSatish Balay } 2763270192aSSatish Balay } 2773270192aSSatish Balay } 2783270192aSSatish Balay } 2793270192aSSatish Balay 2803270192aSSatish Balay DrawFlush(draw); 2813270192aSSatish Balay DrawGetPause(draw,&pause); 2823270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2833270192aSSatish Balay 2843270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2853270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2863270192aSSatish Balay while (button != BUTTON_RIGHT) { 2873270192aSSatish Balay DrawClear(draw); 2883270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2893270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2903270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2913270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 2923270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 2933270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 2943270192aSSatish Balay w *= scale; h *= scale; 2953270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2963270192aSSatish Balay color = DRAW_BLUE; 2973270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2983270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2993270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3003270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3013270192aSSatish Balay aa = a->a + j*bs2; 3023270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3033270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3043270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 3053270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3063270192aSSatish Balay } 3073270192aSSatish Balay } 3083270192aSSatish Balay } 3093270192aSSatish Balay } 3103270192aSSatish Balay color = DRAW_CYAN; 3113270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3123270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3133270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3143270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3153270192aSSatish Balay aa = a->a + j*bs2; 3163270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3173270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3183270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 3193270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3203270192aSSatish Balay } 3213270192aSSatish Balay } 3223270192aSSatish Balay } 3233270192aSSatish Balay } 3243270192aSSatish Balay 3253270192aSSatish Balay color = DRAW_RED; 3263270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3273270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3283270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3293270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3303270192aSSatish Balay aa = a->a + j*bs2; 3313270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3323270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3333270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 3343270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3353270192aSSatish Balay } 3363270192aSSatish Balay } 3373270192aSSatish Balay } 3383270192aSSatish Balay } 3393270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3403270192aSSatish Balay } 3413270192aSSatish Balay return 0; 3423270192aSSatish Balay } 3433270192aSSatish Balay 3445615d1e5SSatish Balay #undef __FUNC__ 3455615d1e5SSatish Balay #define __FUNC__ "MatView_SeqBAIJ" 346b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3472593348eSBarry Smith { 3482593348eSBarry Smith Mat A = (Mat) obj; 34919bcc07fSBarry Smith ViewerType vtype; 35019bcc07fSBarry Smith int ierr; 3512593348eSBarry Smith 35219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 35319bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 354e3372554SBarry Smith SETERRQ(1,0,"Matlab viewer not supported"); 3552593348eSBarry Smith } 35619bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3582593348eSBarry Smith } 35919bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 360b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3612593348eSBarry Smith } 36219bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3633270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3642593348eSBarry Smith } 3652593348eSBarry Smith return 0; 3662593348eSBarry Smith } 367b6490206SBarry Smith 368119a934aSSatish Balay #define CHUNKSIZE 10 369cd0e1443SSatish Balay 3705615d1e5SSatish Balay #undef __FUNC__ 3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ" 372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373cd0e1443SSatish Balay { 374cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3789df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 379cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 380cd0e1443SSatish Balay 381cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 382cd0e1443SSatish Balay row = im[k]; brow = row/bs; 3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 384e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 385e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 3863b2fbd54SBarry Smith #endif 3872c3acbe9SBarry Smith rp = aj + ai[brow]; 3882c3acbe9SBarry Smith ap = aa + bs2*ai[brow]; 3892c3acbe9SBarry Smith rmax = imax[brow]; 3902c3acbe9SBarry Smith nrow = ailen[brow]; 391cd0e1443SSatish Balay low = 0; 392cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 394e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 3963b2fbd54SBarry Smith #endif 397a7c10996SSatish Balay col = in[l]; bcol = col/bs; 398cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 399cd0e1443SSatish Balay if (roworiented) { 400cd0e1443SSatish Balay value = *v++; 401cd0e1443SSatish Balay } 402cd0e1443SSatish Balay else { 403cd0e1443SSatish Balay value = v[k + l*m]; 404cd0e1443SSatish Balay } 405cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 4062c3acbe9SBarry Smith while (high-low > 7) { 407cd0e1443SSatish Balay t = (low+high)/2; 408cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 409cd0e1443SSatish Balay else low = t; 410cd0e1443SSatish Balay } 411cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 412cd0e1443SSatish Balay if (rp[i] > bcol) break; 413cd0e1443SSatish Balay if (rp[i] == bcol) { 4147e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 415cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 416cd0e1443SSatish Balay else *bap = value; 417cd0e1443SSatish Balay goto noinsert; 418cd0e1443SSatish Balay } 419cd0e1443SSatish Balay } 420cd0e1443SSatish Balay if (nonew) goto noinsert; 421cd0e1443SSatish Balay if (nrow >= rmax) { 422cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 423cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424cd0e1443SSatish Balay Scalar *new_a; 425cd0e1443SSatish Balay 426cd0e1443SSatish Balay /* malloc new storage space */ 4277e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4297e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 430cd0e1443SSatish Balay new_i = new_j + new_nz; 431cd0e1443SSatish Balay 432cd0e1443SSatish Balay /* copy over old data into new slots */ 433cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4347e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438cd0e1443SSatish Balay len*sizeof(int)); 439a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443cd0e1443SSatish Balay /* free up old matrix storage */ 444cd0e1443SSatish Balay PetscFree(a->a); 445cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447cd0e1443SSatish Balay a->singlemalloc = 1; 448cd0e1443SSatish Balay 449a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4517e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 45218e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 453cd0e1443SSatish Balay a->reallocs++; 454119a934aSSatish Balay a->nz++; 455cd0e1443SSatish Balay } 4567e67e3f9SSatish Balay N = nrow++ - 1; 457cd0e1443SSatish Balay /* shift up all the later entries in this row */ 458cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 459cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4607e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461cd0e1443SSatish Balay } 4627e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463cd0e1443SSatish Balay rp[i] = bcol; 4647e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 465cd0e1443SSatish Balay noinsert:; 466cd0e1443SSatish Balay low = i; 467cd0e1443SSatish Balay } 468cd0e1443SSatish Balay ailen[brow] = nrow; 469cd0e1443SSatish Balay } 470cd0e1443SSatish Balay return 0; 471cd0e1443SSatish Balay } 472cd0e1443SSatish Balay 4735615d1e5SSatish Balay #undef __FUNC__ 4745615d1e5SSatish Balay #define __FUNC__ "MatGetSize_SeqBAIJ" 4750b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 4760b824a48SSatish Balay { 4770b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4780b824a48SSatish Balay *m = a->m; *n = a->n; 4790b824a48SSatish Balay return 0; 4800b824a48SSatish Balay } 4810b824a48SSatish Balay 4825615d1e5SSatish Balay #undef __FUNC__ 4835615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 4840b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 4850b824a48SSatish Balay { 4860b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4870b824a48SSatish Balay *m = 0; *n = a->m; 4880b824a48SSatish Balay return 0; 4890b824a48SSatish Balay } 4900b824a48SSatish Balay 4915615d1e5SSatish Balay #undef __FUNC__ 4925615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ" 4939867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4949867e207SSatish Balay { 4959867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4967e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 4979867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 4989867e207SSatish Balay 4999867e207SSatish Balay bs = a->bs; 5009867e207SSatish Balay ai = a->i; 5019867e207SSatish Balay aj = a->j; 5029867e207SSatish Balay aa = a->a; 5039df24120SSatish Balay bs2 = a->bs2; 5049867e207SSatish Balay 505e3372554SBarry Smith if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 5069867e207SSatish Balay 5079867e207SSatish Balay bn = row/bs; /* Block number */ 5089867e207SSatish Balay bp = row % bs; /* Block Position */ 5099867e207SSatish Balay M = ai[bn+1] - ai[bn]; 5109867e207SSatish Balay *nz = bs*M; 5119867e207SSatish Balay 5129867e207SSatish Balay if (v) { 5139867e207SSatish Balay *v = 0; 5149867e207SSatish Balay if (*nz) { 5159867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 5169867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 5179867e207SSatish Balay v_i = *v + i*bs; 5187e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 5197e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 5209867e207SSatish Balay } 5219867e207SSatish Balay } 5229867e207SSatish Balay } 5239867e207SSatish Balay 5249867e207SSatish Balay if (idx) { 5259867e207SSatish Balay *idx = 0; 5269867e207SSatish Balay if (*nz) { 5279867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 5289867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 5299867e207SSatish Balay idx_i = *idx + i*bs; 5309867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 5319867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 5329867e207SSatish Balay } 5339867e207SSatish Balay } 5349867e207SSatish Balay } 5359867e207SSatish Balay return 0; 5369867e207SSatish Balay } 5379867e207SSatish Balay 5385615d1e5SSatish Balay #undef __FUNC__ 5395615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_SeqBAIJ" 5409867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 5419867e207SSatish Balay { 5429867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 5439867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 5449867e207SSatish Balay return 0; 5459867e207SSatish Balay } 546b6490206SBarry Smith 5475615d1e5SSatish Balay #undef __FUNC__ 5485615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ" 549f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 550f2501298SSatish Balay { 551f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 552f2501298SSatish Balay Mat C; 553f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 5549df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 555f2501298SSatish Balay Scalar *array=a->a; 556f2501298SSatish Balay 557f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 558e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 559f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 560f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 561a7c10996SSatish Balay 562a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 563f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 564f2501298SSatish Balay PetscFree(col); 565f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 566f2501298SSatish Balay cols = rows + bs; 567f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 568f2501298SSatish Balay cols[0] = i*bs; 569f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 570f2501298SSatish Balay len = ai[i+1] - ai[i]; 571f2501298SSatish Balay for ( j=0; j<len; j++ ) { 572f2501298SSatish Balay rows[0] = (*aj++)*bs; 573f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 574f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 575f2501298SSatish Balay array += bs2; 576f2501298SSatish Balay } 577f2501298SSatish Balay } 5781073c447SSatish Balay PetscFree(rows); 579f2501298SSatish Balay 5806d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5816d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 582f2501298SSatish Balay 583f2501298SSatish Balay if (B != PETSC_NULL) { 584f2501298SSatish Balay *B = C; 585f2501298SSatish Balay } else { 586f2501298SSatish Balay /* This isn't really an in-place transpose */ 587f2501298SSatish Balay PetscFree(a->a); 588f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 589f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 590f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 591f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 592f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 593f2501298SSatish Balay PetscFree(a); 594f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 595f2501298SSatish Balay PetscHeaderDestroy(C); 596f2501298SSatish Balay } 597f2501298SSatish Balay return 0; 598f2501298SSatish Balay } 599f2501298SSatish Balay 600f2501298SSatish Balay 6015615d1e5SSatish Balay #undef __FUNC__ 6025615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 603584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 604584200bdSSatish Balay { 605584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 606584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 607a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 608*d402145bSBarry Smith int mbs = a->mbs, bs2 = a->bs2,rmax; 609584200bdSSatish Balay Scalar *aa = a->a, *ap; 610584200bdSSatish Balay 6116d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 612584200bdSSatish Balay 613*d402145bSBarry Smith rmax = ailen[0]; 614584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 615584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 616584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 617*d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 618584200bdSSatish Balay if (fshift) { 619a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 620584200bdSSatish Balay N = ailen[i]; 621584200bdSSatish Balay for ( j=0; j<N; j++ ) { 622584200bdSSatish Balay ip[j-fshift] = ip[j]; 6237e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 624584200bdSSatish Balay } 625584200bdSSatish Balay } 626584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 627584200bdSSatish Balay } 628584200bdSSatish Balay if (mbs) { 629584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 630584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 631584200bdSSatish Balay } 632584200bdSSatish Balay /* reset ilen and imax for each row */ 633584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 634584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 635584200bdSSatish Balay } 636a7c10996SSatish Balay a->nz = ai[mbs]; 637584200bdSSatish Balay 638584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 639584200bdSSatish Balay if (fshift && a->diag) { 640584200bdSSatish Balay PetscFree(a->diag); 641584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 642584200bdSSatish Balay a->diag = 0; 643584200bdSSatish Balay } 6443d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 645219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 6463d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 647584200bdSSatish Balay a->reallocs); 648*d402145bSBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 649e2f3b5e9SSatish Balay a->reallocs = 0; 6504e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 6514e220ebcSLois Curfman McInnes 652584200bdSSatish Balay return 0; 653584200bdSSatish Balay } 654584200bdSSatish Balay 6555615d1e5SSatish Balay #undef __FUNC__ 6565615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ" 657b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 6582593348eSBarry Smith { 659b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6609df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 6612593348eSBarry Smith return 0; 6622593348eSBarry Smith } 6632593348eSBarry Smith 6645615d1e5SSatish Balay #undef __FUNC__ 6655615d1e5SSatish Balay #define __FUNC__ "MatDestroy_SeqBAIJ" 666b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 6672593348eSBarry Smith { 6682593348eSBarry Smith Mat A = (Mat) obj; 669b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 67090f02eecSBarry Smith int ierr; 6712593348eSBarry Smith 6722593348eSBarry Smith #if defined(PETSC_LOG) 6732593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 6742593348eSBarry Smith #endif 6752593348eSBarry Smith PetscFree(a->a); 6762593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 6772593348eSBarry Smith if (a->diag) PetscFree(a->diag); 6782593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 6792593348eSBarry Smith if (a->imax) PetscFree(a->imax); 6802593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 681de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 6822593348eSBarry Smith PetscFree(a); 68390f02eecSBarry Smith if (A->mapping) { 68490f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 68590f02eecSBarry Smith } 686f2655603SLois Curfman McInnes PLogObjectDestroy(A); 687f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 6882593348eSBarry Smith return 0; 6892593348eSBarry Smith } 6902593348eSBarry Smith 6915615d1e5SSatish Balay #undef __FUNC__ 6925615d1e5SSatish Balay #define __FUNC__ "MatSetOption_SeqBAIJ" 693b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 6942593348eSBarry Smith { 695b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6966d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 6976d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 6986d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 699219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 7006d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 7016d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 7026d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 703219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 7046d4a8577SBarry Smith op == MAT_SYMMETRIC || 7056d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 70690f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 70790f02eecSBarry Smith op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 70894a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 7096d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 710e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 7112593348eSBarry Smith else 712e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 7132593348eSBarry Smith return 0; 7142593348eSBarry Smith } 7152593348eSBarry Smith 7162593348eSBarry Smith 7172593348eSBarry Smith /* -------------------------------------------------------*/ 7182593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 7192593348eSBarry Smith /* -------------------------------------------------------*/ 720b6490206SBarry Smith #include "pinclude/plapack.h" 721b6490206SBarry Smith 7225615d1e5SSatish Balay #undef __FUNC__ 7235615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1" 72439b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 7252593348eSBarry Smith { 726b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 72739b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 728c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 7292593348eSBarry Smith 730c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 731c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 732b6490206SBarry Smith 733119a934aSSatish Balay idx = a->j; 734119a934aSSatish Balay v = a->a; 735119a934aSSatish Balay ii = a->i; 736119a934aSSatish Balay 737119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 738119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 739119a934aSSatish Balay sum = 0.0; 740119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 7411a6a6d98SBarry Smith z[i] = sum; 742119a934aSSatish Balay } 743c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 744c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 74539b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 74639b95ed1SBarry Smith return 0; 74739b95ed1SBarry Smith } 74839b95ed1SBarry Smith 7495615d1e5SSatish Balay #undef __FUNC__ 7505615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2" 75139b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 75239b95ed1SBarry Smith { 75339b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 75439b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 75539b95ed1SBarry Smith register Scalar x1,x2; 75639b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 757c16cb8f2SBarry Smith int j,n; 75839b95ed1SBarry Smith 759c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 760c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 76139b95ed1SBarry Smith 76239b95ed1SBarry Smith idx = a->j; 76339b95ed1SBarry Smith v = a->a; 76439b95ed1SBarry Smith ii = a->i; 76539b95ed1SBarry Smith 766119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 767119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 768119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 769119a934aSSatish Balay for ( j=0; j<n; j++ ) { 770119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 771119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 772119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 773119a934aSSatish Balay v += 4; 774119a934aSSatish Balay } 7751a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 776119a934aSSatish Balay z += 2; 777119a934aSSatish Balay } 778c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 779c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 78039b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 78139b95ed1SBarry Smith return 0; 78239b95ed1SBarry Smith } 78339b95ed1SBarry Smith 7845615d1e5SSatish Balay #undef __FUNC__ 7855615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3" 78639b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 78739b95ed1SBarry Smith { 78839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 78939b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 790c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 79139b95ed1SBarry Smith 792c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 793c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 79439b95ed1SBarry Smith 79539b95ed1SBarry Smith idx = a->j; 79639b95ed1SBarry Smith v = a->a; 79739b95ed1SBarry Smith ii = a->i; 79839b95ed1SBarry Smith 799119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 800119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 801119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 802119a934aSSatish Balay for ( j=0; j<n; j++ ) { 803119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 804119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 805119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 806119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 807119a934aSSatish Balay v += 9; 808119a934aSSatish Balay } 8091a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 810119a934aSSatish Balay z += 3; 811119a934aSSatish Balay } 812c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 813c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 81439b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 81539b95ed1SBarry Smith return 0; 81639b95ed1SBarry Smith } 81739b95ed1SBarry Smith 8185615d1e5SSatish Balay #undef __FUNC__ 8195615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4" 82039b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 82139b95ed1SBarry Smith { 82239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 82339b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 82439b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 82539b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 826c16cb8f2SBarry Smith int j,n; 82739b95ed1SBarry Smith 828c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 829c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 83039b95ed1SBarry Smith 83139b95ed1SBarry Smith idx = a->j; 83239b95ed1SBarry Smith v = a->a; 83339b95ed1SBarry Smith ii = a->i; 83439b95ed1SBarry Smith 835119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 836119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 837119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 838119a934aSSatish Balay for ( j=0; j<n; j++ ) { 839119a934aSSatish Balay xb = x + 4*(*idx++); 840119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 841119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 842119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 843119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 844119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 845119a934aSSatish Balay v += 16; 846119a934aSSatish Balay } 8471a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 848119a934aSSatish Balay z += 4; 849119a934aSSatish Balay } 850c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 851c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 85239b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 85339b95ed1SBarry Smith return 0; 85439b95ed1SBarry Smith } 85539b95ed1SBarry Smith 8565615d1e5SSatish Balay #undef __FUNC__ 8575615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5" 85839b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 85939b95ed1SBarry Smith { 86039b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 86139b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 86239b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 863c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 86439b95ed1SBarry Smith 865c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 866c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 86739b95ed1SBarry Smith 86839b95ed1SBarry Smith idx = a->j; 86939b95ed1SBarry Smith v = a->a; 87039b95ed1SBarry Smith ii = a->i; 87139b95ed1SBarry Smith 872119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 873119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 874119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 875119a934aSSatish Balay for ( j=0; j<n; j++ ) { 876119a934aSSatish Balay xb = x + 5*(*idx++); 877119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 878119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 879119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 880119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 881119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 882119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 883119a934aSSatish Balay v += 25; 884119a934aSSatish Balay } 8851a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 886119a934aSSatish Balay z += 5; 887119a934aSSatish Balay } 888c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 889c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 89039b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 89139b95ed1SBarry Smith return 0; 89239b95ed1SBarry Smith } 89339b95ed1SBarry Smith 8945615d1e5SSatish Balay #undef __FUNC__ 8955615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N" 89639b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 89739b95ed1SBarry Smith { 89839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 89939b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 900c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 901c16cb8f2SBarry Smith int _One = 1,ncols,k; 902c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 90339b95ed1SBarry Smith 904c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 905c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 90639b95ed1SBarry Smith 90739b95ed1SBarry Smith idx = a->j; 90839b95ed1SBarry Smith v = a->a; 90939b95ed1SBarry Smith ii = a->i; 91039b95ed1SBarry Smith 91139b95ed1SBarry Smith 912119a934aSSatish Balay if (!a->mult_work) { 9133b547af2SSatish Balay k = PetscMax(a->m,a->n); 9142b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 915119a934aSSatish Balay } 916119a934aSSatish Balay work = a->mult_work; 917119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 918119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 919119a934aSSatish Balay ncols = n*bs; 920119a934aSSatish Balay workt = work; 921119a934aSSatish Balay for ( j=0; j<n; j++ ) { 922119a934aSSatish Balay xb = x + bs*(*idx++); 923119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 924119a934aSSatish Balay workt += bs; 925119a934aSSatish Balay } 9261a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 927119a934aSSatish Balay v += n*bs2; 928119a934aSSatish Balay z += bs; 929119a934aSSatish Balay } 930c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 931c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 9321a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 933bb42667fSSatish Balay return 0; 934bb42667fSSatish Balay } 935bb42667fSSatish Balay 9365615d1e5SSatish Balay #undef __FUNC__ 9375615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 938f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 939f44d3a6dSSatish Balay { 940f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 941f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 942c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 943f44d3a6dSSatish Balay 944c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 945c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 946c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 947f44d3a6dSSatish Balay 948f44d3a6dSSatish Balay idx = a->j; 949f44d3a6dSSatish Balay v = a->a; 950f44d3a6dSSatish Balay ii = a->i; 951f44d3a6dSSatish Balay 952f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 953f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 954f44d3a6dSSatish Balay sum = y[i]; 955f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 956f44d3a6dSSatish Balay z[i] = sum; 957f44d3a6dSSatish Balay } 958c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 959c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 960c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 961f44d3a6dSSatish Balay PLogFlops(2*a->nz); 962f44d3a6dSSatish Balay return 0; 963f44d3a6dSSatish Balay } 964f44d3a6dSSatish Balay 9655615d1e5SSatish Balay #undef __FUNC__ 9665615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 967f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 968f44d3a6dSSatish Balay { 969f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 970f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 971f44d3a6dSSatish Balay register Scalar x1,x2; 972f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 973c16cb8f2SBarry Smith int j,n; 974f44d3a6dSSatish Balay 975c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 976c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 977c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 978f44d3a6dSSatish Balay 979f44d3a6dSSatish Balay idx = a->j; 980f44d3a6dSSatish Balay v = a->a; 981f44d3a6dSSatish Balay ii = a->i; 982f44d3a6dSSatish Balay 983f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 984f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 985f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 986f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 987f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 988f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 989f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 990f44d3a6dSSatish Balay v += 4; 991f44d3a6dSSatish Balay } 992f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 993f44d3a6dSSatish Balay z += 2; y += 2; 994f44d3a6dSSatish Balay } 995c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 996c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 997c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 998f44d3a6dSSatish Balay PLogFlops(4*a->nz); 999f44d3a6dSSatish Balay return 0; 1000f44d3a6dSSatish Balay } 1001f44d3a6dSSatish Balay 10025615d1e5SSatish Balay #undef __FUNC__ 10035615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1004f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1005f44d3a6dSSatish Balay { 1006f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1007f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1008c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1009f44d3a6dSSatish Balay 1010c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1011c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1012c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1013f44d3a6dSSatish Balay 1014f44d3a6dSSatish Balay idx = a->j; 1015f44d3a6dSSatish Balay v = a->a; 1016f44d3a6dSSatish Balay ii = a->i; 1017f44d3a6dSSatish Balay 1018f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1019f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1020f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1021f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1022f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1023f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1024f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1025f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1026f44d3a6dSSatish Balay v += 9; 1027f44d3a6dSSatish Balay } 1028f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 1029f44d3a6dSSatish Balay z += 3; y += 3; 1030f44d3a6dSSatish Balay } 1031c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1032c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1033c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1034f44d3a6dSSatish Balay PLogFlops(18*a->nz); 1035f44d3a6dSSatish Balay return 0; 1036f44d3a6dSSatish Balay } 1037f44d3a6dSSatish Balay 10385615d1e5SSatish Balay #undef __FUNC__ 10395615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1040f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1041f44d3a6dSSatish Balay { 1042f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1043f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1044f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 1045f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1046c16cb8f2SBarry Smith int j,n; 1047f44d3a6dSSatish Balay 1048c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1049c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1050c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1051f44d3a6dSSatish Balay 1052f44d3a6dSSatish Balay idx = a->j; 1053f44d3a6dSSatish Balay v = a->a; 1054f44d3a6dSSatish Balay ii = a->i; 1055f44d3a6dSSatish Balay 1056f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1057f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1058f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1059f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1060f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1061f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1062f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1063f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1064f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1065f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1066f44d3a6dSSatish Balay v += 16; 1067f44d3a6dSSatish Balay } 1068f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1069f44d3a6dSSatish Balay z += 4; y += 4; 1070f44d3a6dSSatish Balay } 1071c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1072c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1073c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1074f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1075f44d3a6dSSatish Balay return 0; 1076f44d3a6dSSatish Balay } 1077f44d3a6dSSatish Balay 10785615d1e5SSatish Balay #undef __FUNC__ 10795615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1080f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1081f44d3a6dSSatish Balay { 1082f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1083f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1084f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1085c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1086f44d3a6dSSatish Balay 1087c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1088c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1089c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1090f44d3a6dSSatish Balay 1091f44d3a6dSSatish Balay idx = a->j; 1092f44d3a6dSSatish Balay v = a->a; 1093f44d3a6dSSatish Balay ii = a->i; 1094f44d3a6dSSatish Balay 1095f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1096f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1097f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1098f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1099f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1100f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1101f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1102f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1103f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1104f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1105f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1106f44d3a6dSSatish Balay v += 25; 1107f44d3a6dSSatish Balay } 1108f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1109f44d3a6dSSatish Balay z += 5; y += 5; 1110f44d3a6dSSatish Balay } 1111c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1112c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1113c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1114f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1115f44d3a6dSSatish Balay return 0; 1116f44d3a6dSSatish Balay } 1117f44d3a6dSSatish Balay 11185615d1e5SSatish Balay #undef __FUNC__ 11195615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1120f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1121f44d3a6dSSatish Balay { 1122f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1123f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1124f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1125f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1126f44d3a6dSSatish Balay 1127f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1128f44d3a6dSSatish Balay 1129c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1130c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1131f44d3a6dSSatish Balay 1132f44d3a6dSSatish Balay idx = a->j; 1133f44d3a6dSSatish Balay v = a->a; 1134f44d3a6dSSatish Balay ii = a->i; 1135f44d3a6dSSatish Balay 1136f44d3a6dSSatish Balay 1137f44d3a6dSSatish Balay if (!a->mult_work) { 1138f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1139f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1140f44d3a6dSSatish Balay } 1141f44d3a6dSSatish Balay work = a->mult_work; 1142f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1143f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1144f44d3a6dSSatish Balay ncols = n*bs; 1145f44d3a6dSSatish Balay workt = work; 1146f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1147f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1148f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1149f44d3a6dSSatish Balay workt += bs; 1150f44d3a6dSSatish Balay } 1151f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1152f44d3a6dSSatish Balay v += n*bs2; 1153f44d3a6dSSatish Balay z += bs; 1154f44d3a6dSSatish Balay } 1155c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1156c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1157f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1158f44d3a6dSSatish Balay return 0; 1159f44d3a6dSSatish Balay } 1160f44d3a6dSSatish Balay 11615615d1e5SSatish Balay #undef __FUNC__ 11625615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ" 11631a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1164bb42667fSSatish Balay { 1165bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 11661a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1167bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1168bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 11699df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1170119a934aSSatish Balay 1171119a934aSSatish Balay 117290f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 117390f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1174bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1175bb42667fSSatish Balay 1176119a934aSSatish Balay idx = a->j; 1177119a934aSSatish Balay v = a->a; 1178119a934aSSatish Balay ii = a->i; 1179119a934aSSatish Balay 1180119a934aSSatish Balay switch (bs) { 1181119a934aSSatish Balay case 1: 1182119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1183119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1184119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1185119a934aSSatish Balay ib = idx + ai[i]; 1186119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1187bb1453f0SSatish Balay rval = ib[j]; 1188bb1453f0SSatish Balay z[rval] += *v++ * x1; 1189119a934aSSatish Balay } 1190119a934aSSatish Balay } 1191119a934aSSatish Balay break; 1192119a934aSSatish Balay case 2: 1193119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1194119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1195119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1196119a934aSSatish Balay ib = idx + ai[i]; 1197119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1198119a934aSSatish Balay rval = ib[j]*2; 1199bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1200bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1201119a934aSSatish Balay v += 4; 1202119a934aSSatish Balay } 1203119a934aSSatish Balay } 1204119a934aSSatish Balay break; 1205119a934aSSatish Balay case 3: 1206119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1207119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1208119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1209119a934aSSatish Balay ib = idx + ai[i]; 1210119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1211119a934aSSatish Balay rval = ib[j]*3; 1212bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1213bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1214bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1215119a934aSSatish Balay v += 9; 1216119a934aSSatish Balay } 1217119a934aSSatish Balay } 1218119a934aSSatish Balay break; 1219119a934aSSatish Balay case 4: 1220119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1221119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1222119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1223119a934aSSatish Balay ib = idx + ai[i]; 1224119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1225119a934aSSatish Balay rval = ib[j]*4; 1226bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1227bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1228bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1229bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1230119a934aSSatish Balay v += 16; 1231119a934aSSatish Balay } 1232119a934aSSatish Balay } 1233119a934aSSatish Balay break; 1234119a934aSSatish Balay case 5: 1235119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1236119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1237119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1238119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1239119a934aSSatish Balay ib = idx + ai[i]; 1240119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1241119a934aSSatish Balay rval = ib[j]*5; 1242bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1243bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1244bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1245bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1246bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1247119a934aSSatish Balay v += 25; 1248119a934aSSatish Balay } 1249119a934aSSatish Balay } 1250119a934aSSatish Balay break; 1251119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1252119a934aSSatish Balay default: { 1253119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1254119a934aSSatish Balay if (!a->mult_work) { 12553b547af2SSatish Balay k = PetscMax(a->m,a->n); 1256bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1257119a934aSSatish Balay CHKPTRQ(a->mult_work); 1258119a934aSSatish Balay } 1259119a934aSSatish Balay work = a->mult_work; 1260119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1261119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1262119a934aSSatish Balay ncols = n*bs; 1263119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1264119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1265119a934aSSatish Balay v += n*bs2; 1266119a934aSSatish Balay x += bs; 1267119a934aSSatish Balay workt = work; 1268119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1269119a934aSSatish Balay zb = z + bs*(*idx++); 1270bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1271119a934aSSatish Balay workt += bs; 1272119a934aSSatish Balay } 1273119a934aSSatish Balay } 1274119a934aSSatish Balay } 1275119a934aSSatish Balay } 12760419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 12770419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1278faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1279faf6580fSSatish Balay return 0; 1280faf6580fSSatish Balay } 12811c351548SSatish Balay 12825615d1e5SSatish Balay #undef __FUNC__ 12835615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1284faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1285faf6580fSSatish Balay { 1286faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1287faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1288faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1289faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1290faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1291faf6580fSSatish Balay 1292faf6580fSSatish Balay 1293faf6580fSSatish Balay 129490f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 129590f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1296faf6580fSSatish Balay 1297648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1298648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1299648c1d95SSatish Balay 1300faf6580fSSatish Balay 1301faf6580fSSatish Balay idx = a->j; 1302faf6580fSSatish Balay v = a->a; 1303faf6580fSSatish Balay ii = a->i; 1304faf6580fSSatish Balay 1305faf6580fSSatish Balay switch (bs) { 1306faf6580fSSatish Balay case 1: 1307faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1308faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1309faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1310faf6580fSSatish Balay ib = idx + ai[i]; 1311faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1312faf6580fSSatish Balay rval = ib[j]; 1313faf6580fSSatish Balay z[rval] += *v++ * x1; 1314faf6580fSSatish Balay } 1315faf6580fSSatish Balay } 1316faf6580fSSatish Balay break; 1317faf6580fSSatish Balay case 2: 1318faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1319faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1320faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1321faf6580fSSatish Balay ib = idx + ai[i]; 1322faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1323faf6580fSSatish Balay rval = ib[j]*2; 1324faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1325faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1326faf6580fSSatish Balay v += 4; 1327faf6580fSSatish Balay } 1328faf6580fSSatish Balay } 1329faf6580fSSatish Balay break; 1330faf6580fSSatish Balay case 3: 1331faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1332faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1333faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1334faf6580fSSatish Balay ib = idx + ai[i]; 1335faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1336faf6580fSSatish Balay rval = ib[j]*3; 1337faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1338faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1339faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1340faf6580fSSatish Balay v += 9; 1341faf6580fSSatish Balay } 1342faf6580fSSatish Balay } 1343faf6580fSSatish Balay break; 1344faf6580fSSatish Balay case 4: 1345faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1346faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1347faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1348faf6580fSSatish Balay ib = idx + ai[i]; 1349faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1350faf6580fSSatish Balay rval = ib[j]*4; 1351faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1352faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1353faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1354faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1355faf6580fSSatish Balay v += 16; 1356faf6580fSSatish Balay } 1357faf6580fSSatish Balay } 1358faf6580fSSatish Balay break; 1359faf6580fSSatish Balay case 5: 1360faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1361faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1362faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1363faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1364faf6580fSSatish Balay ib = idx + ai[i]; 1365faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1366faf6580fSSatish Balay rval = ib[j]*5; 1367faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1368faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1369faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1370faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1371faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1372faf6580fSSatish Balay v += 25; 1373faf6580fSSatish Balay } 1374faf6580fSSatish Balay } 1375faf6580fSSatish Balay break; 1376faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1377faf6580fSSatish Balay default: { 1378faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1379faf6580fSSatish Balay if (!a->mult_work) { 1380faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1381faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1382faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1383faf6580fSSatish Balay } 1384faf6580fSSatish Balay work = a->mult_work; 1385faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1386faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1387faf6580fSSatish Balay ncols = n*bs; 1388faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1389faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1390faf6580fSSatish Balay v += n*bs2; 1391faf6580fSSatish Balay x += bs; 1392faf6580fSSatish Balay workt = work; 1393faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1394faf6580fSSatish Balay zb = z + bs*(*idx++); 1395faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1396faf6580fSSatish Balay workt += bs; 1397faf6580fSSatish Balay } 1398faf6580fSSatish Balay } 1399faf6580fSSatish Balay } 1400faf6580fSSatish Balay } 1401faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1402faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1403faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 14042593348eSBarry Smith return 0; 14052593348eSBarry Smith } 14062593348eSBarry Smith 14075615d1e5SSatish Balay #undef __FUNC__ 14085615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_SeqBAIJ" 14094e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 14102593348eSBarry Smith { 1411b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14124e220ebcSLois Curfman McInnes 14134e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 14144e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 14154e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 14164e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 14174e220ebcSLois Curfman McInnes info->block_size = a->bs2; 14184e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 14194e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 14204e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 14214e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 14224e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 14234e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 14244e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 14254e220ebcSLois Curfman McInnes info->memory = A->mem; 14264e220ebcSLois Curfman McInnes if (A->factor) { 14274e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 14284e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 14294e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 14304e220ebcSLois Curfman McInnes } else { 14314e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 14324e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14334e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14344e220ebcSLois Curfman McInnes } 14352593348eSBarry Smith return 0; 14362593348eSBarry Smith } 14372593348eSBarry Smith 14385615d1e5SSatish Balay #undef __FUNC__ 14395615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ" 144091d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 144191d316f6SSatish Balay { 144291d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 144391d316f6SSatish Balay 1444e3372554SBarry Smith if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 144591d316f6SSatish Balay 144691d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 144791d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1448a7c10996SSatish Balay (a->nz != b->nz)) { 144991d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 145091d316f6SSatish Balay } 145191d316f6SSatish Balay 145291d316f6SSatish Balay /* if the a->i are the same */ 145391d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 145491d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 145591d316f6SSatish Balay } 145691d316f6SSatish Balay 145791d316f6SSatish Balay /* if a->j are the same */ 145891d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 145991d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 146091d316f6SSatish Balay } 146191d316f6SSatish Balay 146291d316f6SSatish Balay /* if a->a are the same */ 146391d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 146491d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 146591d316f6SSatish Balay } 146691d316f6SSatish Balay *flg = PETSC_TRUE; 146791d316f6SSatish Balay return 0; 146891d316f6SSatish Balay 146991d316f6SSatish Balay } 147091d316f6SSatish Balay 14715615d1e5SSatish Balay #undef __FUNC__ 14725615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 147391d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 147491d316f6SSatish Balay { 147591d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14767e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 147717e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 147817e48fc4SSatish Balay 14790513a670SBarry Smith if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 148017e48fc4SSatish Balay bs = a->bs; 148117e48fc4SSatish Balay aa = a->a; 148217e48fc4SSatish Balay ai = a->i; 148317e48fc4SSatish Balay aj = a->j; 148417e48fc4SSatish Balay ambs = a->mbs; 14859df24120SSatish Balay bs2 = a->bs2; 148691d316f6SSatish Balay 148791d316f6SSatish Balay VecSet(&zero,v); 148890f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1489e3372554SBarry Smith if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 149017e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 149117e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 149217e48fc4SSatish Balay if (aj[j] == i) { 149317e48fc4SSatish Balay row = i*bs; 14947e67e3f9SSatish Balay aa_j = aa+j*bs2; 14957e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 149691d316f6SSatish Balay break; 149791d316f6SSatish Balay } 149891d316f6SSatish Balay } 149991d316f6SSatish Balay } 150091d316f6SSatish Balay return 0; 150191d316f6SSatish Balay } 150291d316f6SSatish Balay 15035615d1e5SSatish Balay #undef __FUNC__ 15045615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 15059867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 15069867e207SSatish Balay { 15079867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 15089867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 15097e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 15109867e207SSatish Balay 15119867e207SSatish Balay ai = a->i; 15129867e207SSatish Balay aj = a->j; 15139867e207SSatish Balay aa = a->a; 15149867e207SSatish Balay m = a->m; 15159867e207SSatish Balay n = a->n; 15169867e207SSatish Balay bs = a->bs; 15179867e207SSatish Balay mbs = a->mbs; 15189df24120SSatish Balay bs2 = a->bs2; 15199867e207SSatish Balay if (ll) { 152090f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1521e3372554SBarry Smith if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 15229867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 15239867e207SSatish Balay M = ai[i+1] - ai[i]; 15249867e207SSatish Balay li = l + i*bs; 15257e67e3f9SSatish Balay v = aa + bs2*ai[i]; 15269867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 15277e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 15289867e207SSatish Balay (*v++) *= li[k%bs]; 15299867e207SSatish Balay } 15309867e207SSatish Balay } 15319867e207SSatish Balay } 15329867e207SSatish Balay } 15339867e207SSatish Balay 15349867e207SSatish Balay if (rr) { 153590f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1536e3372554SBarry Smith if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 15379867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 15389867e207SSatish Balay M = ai[i+1] - ai[i]; 15397e67e3f9SSatish Balay v = aa + bs2*ai[i]; 15409867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 15419867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 15429867e207SSatish Balay for ( k=0; k<bs; k++ ) { 15439867e207SSatish Balay x = ri[k]; 15449867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 15459867e207SSatish Balay } 15469867e207SSatish Balay } 15479867e207SSatish Balay } 15489867e207SSatish Balay } 15499867e207SSatish Balay return 0; 15509867e207SSatish Balay } 15519867e207SSatish Balay 15529867e207SSatish Balay 1553b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1554b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 15552a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1556736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1557736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 15581a6a6d98SBarry Smith 15591a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 15601a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 15611a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 15621a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 15631a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 15641a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 15651a6a6d98SBarry Smith 15667fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 15677fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 15687fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 15697fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 15707fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 15717fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 15722593348eSBarry Smith 15735615d1e5SSatish Balay #undef __FUNC__ 15745615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 1575b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 15762593348eSBarry Smith { 1577b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 15782593348eSBarry Smith Scalar *v = a->a; 15792593348eSBarry Smith double sum = 0.0; 15809df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 15812593348eSBarry Smith 15822593348eSBarry Smith if (type == NORM_FROBENIUS) { 15830419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 15842593348eSBarry Smith #if defined(PETSC_COMPLEX) 15852593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 15862593348eSBarry Smith #else 15872593348eSBarry Smith sum += (*v)*(*v); v++; 15882593348eSBarry Smith #endif 15892593348eSBarry Smith } 15902593348eSBarry Smith *norm = sqrt(sum); 15912593348eSBarry Smith } 15922593348eSBarry Smith else { 1593e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 15942593348eSBarry Smith } 15952593348eSBarry Smith return 0; 15962593348eSBarry Smith } 15972593348eSBarry Smith 15982593348eSBarry Smith /* 15992593348eSBarry Smith note: This can only work for identity for row and col. It would 16002593348eSBarry Smith be good to check this and otherwise generate an error. 16012593348eSBarry Smith */ 16025615d1e5SSatish Balay #undef __FUNC__ 16035615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ" 1604b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 16052593348eSBarry Smith { 1606b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 16072593348eSBarry Smith Mat outA; 1608de6a44a3SBarry Smith int ierr; 16092593348eSBarry Smith 1610e3372554SBarry Smith if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 16112593348eSBarry Smith 16122593348eSBarry Smith outA = inA; 16132593348eSBarry Smith inA->factor = FACTOR_LU; 16142593348eSBarry Smith a->row = row; 16152593348eSBarry Smith a->col = col; 16162593348eSBarry Smith 1617de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 16182593348eSBarry Smith 16192593348eSBarry Smith if (!a->diag) { 1620b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 16212593348eSBarry Smith } 16227fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 16232593348eSBarry Smith return 0; 16242593348eSBarry Smith } 16252593348eSBarry Smith 16265615d1e5SSatish Balay #undef __FUNC__ 16275615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ" 1628b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 16292593348eSBarry Smith { 1630b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 16319df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1632b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1633b6490206SBarry Smith PLogFlops(totalnz); 16342593348eSBarry Smith return 0; 16352593348eSBarry Smith } 16362593348eSBarry Smith 16375615d1e5SSatish Balay #undef __FUNC__ 16385615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ" 16397e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 16407e67e3f9SSatish Balay { 16417e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16427e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1643a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 16449df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 16457e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 16467e67e3f9SSatish Balay 16477e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 16487e67e3f9SSatish Balay row = im[k]; brow = row/bs; 1649e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 1650e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 1651a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 16527e67e3f9SSatish Balay nrow = ailen[brow]; 16537e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 1654e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1655e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1656a7c10996SSatish Balay col = in[l] ; 16577e67e3f9SSatish Balay bcol = col/bs; 16587e67e3f9SSatish Balay cidx = col%bs; 16597e67e3f9SSatish Balay ridx = row%bs; 16607e67e3f9SSatish Balay high = nrow; 16617e67e3f9SSatish Balay low = 0; /* assume unsorted */ 16627e67e3f9SSatish Balay while (high-low > 5) { 16637e67e3f9SSatish Balay t = (low+high)/2; 16647e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 16657e67e3f9SSatish Balay else low = t; 16667e67e3f9SSatish Balay } 16677e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 16687e67e3f9SSatish Balay if (rp[i] > bcol) break; 16697e67e3f9SSatish Balay if (rp[i] == bcol) { 16707e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 16717e67e3f9SSatish Balay goto finished; 16727e67e3f9SSatish Balay } 16737e67e3f9SSatish Balay } 16747e67e3f9SSatish Balay *v++ = zero; 16757e67e3f9SSatish Balay finished:; 16767e67e3f9SSatish Balay } 16777e67e3f9SSatish Balay } 16787e67e3f9SSatish Balay return 0; 16797e67e3f9SSatish Balay } 16807e67e3f9SSatish Balay 16815615d1e5SSatish Balay #undef __FUNC__ 16825615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 16835a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 16845a838052SSatish Balay { 16855a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 16865a838052SSatish Balay *bs = baij->bs; 16875a838052SSatish Balay return 0; 16885a838052SSatish Balay } 16895a838052SSatish Balay 1690d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 16915615d1e5SSatish Balay #undef __FUNC__ 16925615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1693d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1694d9b7c43dSSatish Balay { 1695d9b7c43dSSatish Balay int i,row; 1696d9b7c43dSSatish Balay row = idx[0]; 1697d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1698d9b7c43dSSatish Balay 1699d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1700d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1701d9b7c43dSSatish Balay } 1702d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1703d9b7c43dSSatish Balay return 0; 1704d9b7c43dSSatish Balay } 1705d9b7c43dSSatish Balay 17065615d1e5SSatish Balay #undef __FUNC__ 17075615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ" 1708d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1709d9b7c43dSSatish Balay { 1710d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1711d9b7c43dSSatish Balay IS is_local; 1712d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1713d9b7c43dSSatish Balay PetscTruth flg; 1714d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1715d9b7c43dSSatish Balay 1716d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1717d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1718d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1719537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1720d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1721d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1722d9b7c43dSSatish Balay 1723d9b7c43dSSatish Balay i = 0; 1724d9b7c43dSSatish Balay while (i < is_n) { 1725e3372554SBarry Smith if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1726d9b7c43dSSatish Balay flg = PETSC_FALSE; 1727d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1728d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1729d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1730d9b7c43dSSatish Balay i += bs; 1731d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1732d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1733d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1734d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1735d9b7c43dSSatish Balay aa[0] = zero; 1736d9b7c43dSSatish Balay aa+=bs; 1737d9b7c43dSSatish Balay } 1738d9b7c43dSSatish Balay i++; 1739d9b7c43dSSatish Balay } 1740d9b7c43dSSatish Balay } 1741d9b7c43dSSatish Balay if (diag) { 1742d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1743d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1744d9b7c43dSSatish Balay } 1745d9b7c43dSSatish Balay } 1746d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1747d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1748d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 17499a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1750d9b7c43dSSatish Balay 1751d9b7c43dSSatish Balay return 0; 1752d9b7c43dSSatish Balay } 17531c351548SSatish Balay 17545615d1e5SSatish Balay #undef __FUNC__ 17555615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1756ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1757ba4ca20aSSatish Balay { 1758ba4ca20aSSatish Balay static int called = 0; 1759ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1760ba4ca20aSSatish Balay 1761ba4ca20aSSatish Balay if (called) return 0; else called = 1; 1762ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1763ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 1764ba4ca20aSSatish Balay return 0; 1765ba4ca20aSSatish Balay } 1766d9b7c43dSSatish Balay 17672593348eSBarry Smith /* -------------------------------------------------------------------*/ 1768cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 17699867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1770f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1771faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 17721a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 1773b6490206SBarry Smith 0,0, 1774de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 1775b6490206SBarry Smith 0, 1776f2501298SSatish Balay MatTranspose_SeqBAIJ, 177717e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 17789867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1779584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 1780b6490206SBarry Smith 0, 1781d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 17827fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1783b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1784de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 1785*d402145bSBarry Smith 0,0, 1786b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 1787b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 1788af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 17897e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 1790ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 17913b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 17923b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 17933b2fbd54SBarry Smith MatRestoreRowIJ_SeqBAIJ}; 17942593348eSBarry Smith 17955615d1e5SSatish Balay #undef __FUNC__ 17965615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ" 17972593348eSBarry Smith /*@C 179844cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 179944cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 180044cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 18012bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 18022bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 18032593348eSBarry Smith 18042593348eSBarry Smith Input Parameters: 18052593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 1806b6490206SBarry Smith . bs - size of block 18072593348eSBarry Smith . m - number of rows 18082593348eSBarry Smith . n - number of columns 1809b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 18102bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 18112bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 18122593348eSBarry Smith 18132593348eSBarry Smith Output Parameter: 18142593348eSBarry Smith . A - the matrix 18152593348eSBarry Smith 18160513a670SBarry Smith Options Database Keys: 18170513a670SBarry Smith $ -mat_no_unroll - uses code that does not unroll the loops in the 18180513a670SBarry Smith $ block calculations (much solver) 18190513a670SBarry Smith $ -mat_block_size - size of the blocks to use 18200513a670SBarry Smith 18212593348eSBarry Smith Notes: 182244cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 18232593348eSBarry Smith storage. That is, the stored row and column indices can begin at 182444cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 18252593348eSBarry Smith 18262593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 18272593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 18282593348eSBarry Smith allocation. For additional details, see the users manual chapter on 18296da5968aSLois Curfman McInnes matrices. 18302593348eSBarry Smith 183144cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 18322593348eSBarry Smith @*/ 1833b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 18342593348eSBarry Smith { 18352593348eSBarry Smith Mat B; 1836b6490206SBarry Smith Mat_SeqBAIJ *b; 18373b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 18383b2fbd54SBarry Smith 18393b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 1840e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 1841b6490206SBarry Smith 18420513a670SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 18430513a670SBarry Smith 1844f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 1845e3372554SBarry Smith SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 18462593348eSBarry Smith 18472593348eSBarry Smith *A = 0; 1848b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 18492593348eSBarry Smith PLogObjectCreate(B); 1850b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 185144cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 18522593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 18531a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 18541a6a6d98SBarry Smith if (!flg) { 18557fc0212eSBarry Smith switch (bs) { 18567fc0212eSBarry Smith case 1: 18577fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 18581a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 185939b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 1860f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 18617fc0212eSBarry Smith break; 18624eeb42bcSBarry Smith case 2: 18634eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 18641a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 186539b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 1866f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 18676d84be18SBarry Smith break; 1868f327dd97SBarry Smith case 3: 1869f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 18701a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 187139b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 1872f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 18734eeb42bcSBarry Smith break; 18746d84be18SBarry Smith case 4: 18756d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 18761a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 187739b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 1878f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 18796d84be18SBarry Smith break; 18806d84be18SBarry Smith case 5: 18816d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 18821a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 188339b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 1884f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 18856d84be18SBarry Smith break; 18867fc0212eSBarry Smith } 18871a6a6d98SBarry Smith } 1888b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1889b6490206SBarry Smith B->view = MatView_SeqBAIJ; 18902593348eSBarry Smith B->factor = 0; 18912593348eSBarry Smith B->lupivotthreshold = 1.0; 189290f02eecSBarry Smith B->mapping = 0; 18932593348eSBarry Smith b->row = 0; 18942593348eSBarry Smith b->col = 0; 18952593348eSBarry Smith b->reallocs = 0; 18962593348eSBarry Smith 189744cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 189844cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1899b6490206SBarry Smith b->mbs = mbs; 1900f2501298SSatish Balay b->nbs = nbs; 1901b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 19022593348eSBarry Smith if (nnz == PETSC_NULL) { 1903119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 19042593348eSBarry Smith else if (nz <= 0) nz = 1; 1905b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1906b6490206SBarry Smith nz = nz*mbs; 19072593348eSBarry Smith } 19082593348eSBarry Smith else { 19092593348eSBarry Smith nz = 0; 1910b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 19112593348eSBarry Smith } 19122593348eSBarry Smith 19132593348eSBarry Smith /* allocate the matrix space */ 19147e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 19152593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 19167e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 19177e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 19182593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 19192593348eSBarry Smith b->i = b->j + nz; 19202593348eSBarry Smith b->singlemalloc = 1; 19212593348eSBarry Smith 1922de6a44a3SBarry Smith b->i[0] = 0; 1923b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 19242593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 19252593348eSBarry Smith } 19262593348eSBarry Smith 1927b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1928b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1929b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1930b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 19312593348eSBarry Smith 1932b6490206SBarry Smith b->bs = bs; 19339df24120SSatish Balay b->bs2 = bs2; 1934b6490206SBarry Smith b->mbs = mbs; 19352593348eSBarry Smith b->nz = 0; 193618e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 19372593348eSBarry Smith b->sorted = 0; 19382593348eSBarry Smith b->roworiented = 1; 19392593348eSBarry Smith b->nonew = 0; 19402593348eSBarry Smith b->diag = 0; 19412593348eSBarry Smith b->solve_work = 0; 1942de6a44a3SBarry Smith b->mult_work = 0; 19432593348eSBarry Smith b->spptr = 0; 19444e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 19454e220ebcSLois Curfman McInnes 19462593348eSBarry Smith *A = B; 19472593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 19482593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 19492593348eSBarry Smith return 0; 19502593348eSBarry Smith } 19512593348eSBarry Smith 19525615d1e5SSatish Balay #undef __FUNC__ 19535615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ" 1954b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 19552593348eSBarry Smith { 19562593348eSBarry Smith Mat C; 1957b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 19589df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1959de6a44a3SBarry Smith 1960e3372554SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 19612593348eSBarry Smith 19622593348eSBarry Smith *B = 0; 1963b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 19642593348eSBarry Smith PLogObjectCreate(C); 1965b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 19662593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1967b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1968b6490206SBarry Smith C->view = MatView_SeqBAIJ; 19692593348eSBarry Smith C->factor = A->factor; 19702593348eSBarry Smith c->row = 0; 19712593348eSBarry Smith c->col = 0; 19722593348eSBarry Smith C->assembled = PETSC_TRUE; 19732593348eSBarry Smith 197444cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 197544cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 197644cd7ae7SLois Curfman McInnes C->M = a->m; 197744cd7ae7SLois Curfman McInnes C->N = a->n; 197844cd7ae7SLois Curfman McInnes 1979b6490206SBarry Smith c->bs = a->bs; 19809df24120SSatish Balay c->bs2 = a->bs2; 1981b6490206SBarry Smith c->mbs = a->mbs; 1982b6490206SBarry Smith c->nbs = a->nbs; 19832593348eSBarry Smith 1984b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1985b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1986b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 19872593348eSBarry Smith c->imax[i] = a->imax[i]; 19882593348eSBarry Smith c->ilen[i] = a->ilen[i]; 19892593348eSBarry Smith } 19902593348eSBarry Smith 19912593348eSBarry Smith /* allocate the matrix space */ 19922593348eSBarry Smith c->singlemalloc = 1; 19937e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 19942593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 19957e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1996de6a44a3SBarry Smith c->i = c->j + nz; 1997b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1998b6490206SBarry Smith if (mbs > 0) { 1999de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 20002593348eSBarry Smith if (cpvalues == COPY_VALUES) { 20017e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 20022593348eSBarry Smith } 20032593348eSBarry Smith } 20042593348eSBarry Smith 2005b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 20062593348eSBarry Smith c->sorted = a->sorted; 20072593348eSBarry Smith c->roworiented = a->roworiented; 20082593348eSBarry Smith c->nonew = a->nonew; 20092593348eSBarry Smith 20102593348eSBarry Smith if (a->diag) { 2011b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2012b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2013b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 20142593348eSBarry Smith c->diag[i] = a->diag[i]; 20152593348eSBarry Smith } 20162593348eSBarry Smith } 20172593348eSBarry Smith else c->diag = 0; 20182593348eSBarry Smith c->nz = a->nz; 20192593348eSBarry Smith c->maxnz = a->maxnz; 20202593348eSBarry Smith c->solve_work = 0; 20212593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 20227fc0212eSBarry Smith c->mult_work = 0; 20232593348eSBarry Smith *B = C; 20242593348eSBarry Smith return 0; 20252593348eSBarry Smith } 20262593348eSBarry Smith 20275615d1e5SSatish Balay #undef __FUNC__ 20285615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ" 202919bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 20302593348eSBarry Smith { 2031b6490206SBarry Smith Mat_SeqBAIJ *a; 20322593348eSBarry Smith Mat B; 2033de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2034b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 203535aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2036a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 2037b6490206SBarry Smith Scalar *aa; 203819bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 20392593348eSBarry Smith 20401a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2041de6a44a3SBarry Smith bs2 = bs*bs; 2042b6490206SBarry Smith 20432593348eSBarry Smith MPI_Comm_size(comm,&size); 2044e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"view must have one processor"); 204590ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 204677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2047e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 20482593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 20492593348eSBarry Smith 2050e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 205135aab85fSBarry Smith 205235aab85fSBarry Smith /* 205335aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 205435aab85fSBarry Smith divisible by the blocksize 205535aab85fSBarry Smith */ 2056b6490206SBarry Smith mbs = M/bs; 205735aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 205835aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 205935aab85fSBarry Smith else mbs++; 206035aab85fSBarry Smith if (extra_rows) { 2061537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 206235aab85fSBarry Smith } 2063b6490206SBarry Smith 20642593348eSBarry Smith /* read in row lengths */ 206535aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 206677c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 206735aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 20682593348eSBarry Smith 2069b6490206SBarry Smith /* read in column indices */ 207035aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 207177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 207235aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2073b6490206SBarry Smith 2074b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2075b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2076b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 207735aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 207835aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 207935aab85fSBarry Smith masked = mask + mbs; 2080b6490206SBarry Smith rowcount = 0; nzcount = 0; 2081b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 208235aab85fSBarry Smith nmask = 0; 2083b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2084b6490206SBarry Smith kmax = rowlengths[rowcount]; 2085b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 208635aab85fSBarry Smith tmp = jj[nzcount++]/bs; 208735aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2088b6490206SBarry Smith } 2089b6490206SBarry Smith rowcount++; 2090b6490206SBarry Smith } 209135aab85fSBarry Smith browlengths[i] += nmask; 209235aab85fSBarry Smith /* zero out the mask elements we set */ 209335aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2094b6490206SBarry Smith } 2095b6490206SBarry Smith 20962593348eSBarry Smith /* create our matrix */ 209735aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 209835aab85fSBarry Smith CHKERRQ(ierr); 20992593348eSBarry Smith B = *A; 2100b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 21012593348eSBarry Smith 21022593348eSBarry Smith /* set matrix "i" values */ 2103de6a44a3SBarry Smith a->i[0] = 0; 2104b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 2105b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2106b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 21072593348eSBarry Smith } 2108b6490206SBarry Smith a->nz = 0; 2109b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 21102593348eSBarry Smith 2111b6490206SBarry Smith /* read in nonzero values */ 211235aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 211377c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 211435aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2115b6490206SBarry Smith 2116b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2117b6490206SBarry Smith nzcount = 0; jcount = 0; 2118b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2119b6490206SBarry Smith nzcountb = nzcount; 212035aab85fSBarry Smith nmask = 0; 2121b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2122b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2123b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 212435aab85fSBarry Smith tmp = jj[nzcount++]/bs; 212535aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2126b6490206SBarry Smith } 2127b6490206SBarry Smith rowcount++; 2128b6490206SBarry Smith } 2129de6a44a3SBarry Smith /* sort the masked values */ 213077c4ece6SBarry Smith PetscSortInt(nmask,masked); 2131de6a44a3SBarry Smith 2132b6490206SBarry Smith /* set "j" values into matrix */ 2133b6490206SBarry Smith maskcount = 1; 213435aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 213535aab85fSBarry Smith a->j[jcount++] = masked[j]; 2136de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2137b6490206SBarry Smith } 2138b6490206SBarry Smith /* set "a" values into matrix */ 2139de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2140b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2141b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2142b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2143de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2144de6a44a3SBarry Smith block = mask[tmp] - 1; 2145de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2146de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2147b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2148b6490206SBarry Smith } 2149b6490206SBarry Smith } 215035aab85fSBarry Smith /* zero out the mask elements we set */ 215135aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2152b6490206SBarry Smith } 2153e3372554SBarry Smith if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2154b6490206SBarry Smith 2155b6490206SBarry Smith PetscFree(rowlengths); 2156b6490206SBarry Smith PetscFree(browlengths); 2157b6490206SBarry Smith PetscFree(aa); 2158b6490206SBarry Smith PetscFree(jj); 2159b6490206SBarry Smith PetscFree(mask); 2160b6490206SBarry Smith 2161b6490206SBarry Smith B->assembled = PETSC_TRUE; 2162de6a44a3SBarry Smith 2163de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2164de6a44a3SBarry Smith if (flg) { 216519bcc07fSBarry Smith Viewer tviewer; 216619bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2167639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 216819bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 216919bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2170de6a44a3SBarry Smith } 2171de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2172de6a44a3SBarry Smith if (flg) { 217319bcc07fSBarry Smith Viewer tviewer; 217419bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2175639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 217619bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 217719bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2178de6a44a3SBarry Smith } 2179de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2180de6a44a3SBarry Smith if (flg) { 218119bcc07fSBarry Smith Viewer tviewer; 218219bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 218319bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 218419bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2185de6a44a3SBarry Smith } 2186de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2187de6a44a3SBarry Smith if (flg) { 218819bcc07fSBarry Smith Viewer tviewer; 218919bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2190639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 219119bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 219219bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2193de6a44a3SBarry Smith } 2194de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2195de6a44a3SBarry Smith if (flg) { 219619bcc07fSBarry Smith Viewer tviewer; 219719bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 219819bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 219919bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 220019bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2201de6a44a3SBarry Smith } 22022593348eSBarry Smith return 0; 22032593348eSBarry Smith } 22042593348eSBarry Smith 22052593348eSBarry Smith 22062593348eSBarry Smith 2207