12593348eSBarry Smith #ifndef lint 2*eed86810SBarry Smith static char vcid[] = "$Id: baij.c,v 1.103 1997/06/03 20:26:37 balay Exp bsmith $"; 32593348eSBarry Smith #endif 42593348eSBarry Smith 52593348eSBarry Smith /* 6b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 72593348eSBarry Smith matrix storage format. 82593348eSBarry Smith */ 93369ce9aSBarry Smith 103369ce9aSBarry Smith #include "pinclude/pviewer.h" 113369ce9aSBarry Smith #include "sys.h" 1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 131a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 141a6a6d98SBarry Smith #include "src/inline/spops.h" 152593348eSBarry Smith #include "petsc.h" 163b547af2SSatish Balay 172593348eSBarry Smith 18de6a44a3SBarry Smith /* 19de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 20de6a44a3SBarry Smith */ 21de6a44a3SBarry Smith 225615d1e5SSatish Balay #undef __FUNC__ 235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 25de6a44a3SBarry Smith { 26de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 277fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 28de6a44a3SBarry Smith 29de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 317fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 32de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33de6a44a3SBarry Smith if (a->j[j] == i) { 34de6a44a3SBarry Smith diag[i] = j; 35de6a44a3SBarry Smith break; 36de6a44a3SBarry Smith } 37de6a44a3SBarry Smith } 38de6a44a3SBarry Smith } 39de6a44a3SBarry Smith a->diag = diag; 40de6a44a3SBarry Smith return 0; 41de6a44a3SBarry Smith } 422593348eSBarry Smith 432593348eSBarry Smith 443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 453b2fbd54SBarry Smith 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 503b2fbd54SBarry Smith { 513b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 523b2fbd54SBarry Smith int ierr, n = a->mbs,i; 533b2fbd54SBarry Smith 543b2fbd54SBarry Smith *nn = n; 553b2fbd54SBarry Smith if (!ia) return 0; 563b2fbd54SBarry Smith if (symmetric) { 573b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 583b2fbd54SBarry Smith } else if (oshift == 1) { 593b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 603b2fbd54SBarry Smith int nz = a->i[n] + 1; 613b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 623b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 633b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 643b2fbd54SBarry Smith } else { 653b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 663b2fbd54SBarry Smith } 673b2fbd54SBarry Smith 683b2fbd54SBarry Smith return 0; 693b2fbd54SBarry Smith } 703b2fbd54SBarry Smith 715615d1e5SSatish Balay #undef __FUNC__ 725eea60f9SBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 743b2fbd54SBarry Smith PetscTruth *done) 753b2fbd54SBarry Smith { 763b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773b2fbd54SBarry Smith int i,n = a->mbs; 783b2fbd54SBarry Smith 793b2fbd54SBarry Smith if (!ia) return 0; 803b2fbd54SBarry Smith if (symmetric) { 813b2fbd54SBarry Smith PetscFree(*ia); 823b2fbd54SBarry Smith PetscFree(*ja); 83af5da2bfSBarry Smith } else if (oshift == 1) { 843b2fbd54SBarry Smith int nz = a->i[n]; 853b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 863b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 873b2fbd54SBarry Smith } 883b2fbd54SBarry Smith return 0; 893b2fbd54SBarry Smith } 903b2fbd54SBarry Smith 913b2fbd54SBarry Smith 925615d1e5SSatish Balay #undef __FUNC__ 935eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 952593348eSBarry Smith { 96b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 979df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98b6490206SBarry Smith Scalar *aa; 992593348eSBarry Smith 10090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1012593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1022593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1033b2fbd54SBarry Smith 1042593348eSBarry Smith col_lens[1] = a->m; 1052593348eSBarry Smith col_lens[2] = a->n; 1067e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1072593348eSBarry Smith 1082593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 109b6490206SBarry Smith count = 0; 110b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 111b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 112b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113b6490206SBarry Smith } 1142593348eSBarry Smith } 11577c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1162593348eSBarry Smith PetscFree(col_lens); 1172593348eSBarry Smith 1182593348eSBarry Smith /* store column indices (zero start index) */ 11966963ce1SSatish Balay jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120b6490206SBarry Smith count = 0; 121b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 122b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 123b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 125b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1262593348eSBarry Smith } 1272593348eSBarry Smith } 128b6490206SBarry Smith } 129b6490206SBarry Smith } 1307e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131b6490206SBarry Smith PetscFree(jj); 1322593348eSBarry Smith 1332593348eSBarry Smith /* store nonzero values */ 13466963ce1SSatish Balay aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135b6490206SBarry Smith count = 0; 136b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 137b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 138b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1407e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 141b6490206SBarry Smith } 142b6490206SBarry Smith } 143b6490206SBarry Smith } 144b6490206SBarry Smith } 1457e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146b6490206SBarry Smith PetscFree(aa); 1472593348eSBarry Smith return 0; 1482593348eSBarry Smith } 1492593348eSBarry Smith 1505615d1e5SSatish Balay #undef __FUNC__ 1515eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1532593348eSBarry Smith { 154b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1559df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1562593348eSBarry Smith FILE *fd; 1572593348eSBarry Smith char *outputname; 1582593348eSBarry Smith 15990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1602593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 162639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1637ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1642593348eSBarry Smith } 165639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166e3372554SBarry Smith SETERRQ(1,0,"Matlab format not supported"); 1672593348eSBarry Smith } 168639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 16944cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17044cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17144cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17244cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17344cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 17544cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17644cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17744cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 17844cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 17944cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18044cd7ae7SLois Curfman McInnes #else 18144cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18344cd7ae7SLois Curfman McInnes #endif 18444cd7ae7SLois Curfman McInnes } 18544cd7ae7SLois Curfman McInnes } 18644cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes } 1902593348eSBarry Smith else { 191b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 192b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 193b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 194b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1977e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 19888685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 1997e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20088685aaeSLois Curfman McInnes } 20188685aaeSLois Curfman McInnes else { 2027e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 20488685aaeSLois Curfman McInnes #else 2057e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 20688685aaeSLois Curfman McInnes #endif 2072593348eSBarry Smith } 2082593348eSBarry Smith } 2092593348eSBarry Smith fprintf(fd,"\n"); 2102593348eSBarry Smith } 2112593348eSBarry Smith } 212b6490206SBarry Smith } 2132593348eSBarry Smith fflush(fd); 2142593348eSBarry Smith return 0; 2152593348eSBarry Smith } 2162593348eSBarry Smith 2175615d1e5SSatish Balay #undef __FUNC__ 2185eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 2193270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2203270192aSSatish Balay { 2213270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2223270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2233270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2243270192aSSatish Balay Scalar *aa; 2253270192aSSatish Balay Draw draw; 2263270192aSSatish Balay DrawButton button; 2273270192aSSatish Balay PetscTruth isnull; 2283270192aSSatish Balay 2293270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2303270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2313270192aSSatish Balay 2323270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2333270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2343270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2353270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2363270192aSSatish Balay color = DRAW_BLUE; 2373270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2383270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2393270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2403270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2413270192aSSatish Balay aa = a->a + j*bs2; 2423270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2433270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2443270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 245b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2463270192aSSatish Balay } 2473270192aSSatish Balay } 2483270192aSSatish Balay } 2493270192aSSatish Balay } 2503270192aSSatish Balay color = DRAW_CYAN; 2513270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2523270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2533270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2543270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2553270192aSSatish Balay aa = a->a + j*bs2; 2563270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2573270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2583270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 259b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2603270192aSSatish Balay } 2613270192aSSatish Balay } 2623270192aSSatish Balay } 2633270192aSSatish Balay } 2643270192aSSatish Balay 2653270192aSSatish Balay color = DRAW_RED; 2663270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2673270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2683270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2693270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2703270192aSSatish Balay aa = a->a + j*bs2; 2713270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2723270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2733270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 274b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2753270192aSSatish Balay } 2763270192aSSatish Balay } 2773270192aSSatish Balay } 2783270192aSSatish Balay } 2793270192aSSatish Balay 2803270192aSSatish Balay DrawFlush(draw); 2813270192aSSatish Balay DrawGetPause(draw,&pause); 2823270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2833270192aSSatish Balay 2843270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2853270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2863270192aSSatish Balay while (button != BUTTON_RIGHT) { 2873270192aSSatish Balay DrawClear(draw); 2883270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2893270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2903270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2913270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 2923270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 2933270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 2943270192aSSatish Balay w *= scale; h *= scale; 2953270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2963270192aSSatish Balay color = DRAW_BLUE; 2973270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2983270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2993270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3003270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3013270192aSSatish Balay aa = a->a + j*bs2; 3023270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3033270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3043270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 305b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3063270192aSSatish Balay } 3073270192aSSatish Balay } 3083270192aSSatish Balay } 3093270192aSSatish Balay } 3103270192aSSatish Balay color = DRAW_CYAN; 3113270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3123270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3133270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3143270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3153270192aSSatish Balay aa = a->a + j*bs2; 3163270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3173270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3183270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 319b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3203270192aSSatish Balay } 3213270192aSSatish Balay } 3223270192aSSatish Balay } 3233270192aSSatish Balay } 3243270192aSSatish Balay 3253270192aSSatish Balay color = DRAW_RED; 3263270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3273270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3283270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3293270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3303270192aSSatish Balay aa = a->a + j*bs2; 3313270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3323270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3333270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 334b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3353270192aSSatish Balay } 3363270192aSSatish Balay } 3373270192aSSatish Balay } 3383270192aSSatish Balay } 3393270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3403270192aSSatish Balay } 3413270192aSSatish Balay return 0; 3423270192aSSatish Balay } 3433270192aSSatish Balay 3445615d1e5SSatish Balay #undef __FUNC__ 3455eea60f9SBarry Smith #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 3468f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3472593348eSBarry Smith { 3482593348eSBarry Smith Mat A = (Mat) obj; 34919bcc07fSBarry Smith ViewerType vtype; 35019bcc07fSBarry Smith int ierr; 3512593348eSBarry Smith 35219bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 35319bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 354e3372554SBarry Smith SETERRQ(1,0,"Matlab viewer not supported"); 3552593348eSBarry Smith } 35619bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3582593348eSBarry Smith } 35919bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 360b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3612593348eSBarry Smith } 36219bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3633270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3642593348eSBarry Smith } 3652593348eSBarry Smith return 0; 3662593348eSBarry Smith } 367b6490206SBarry Smith 368119a934aSSatish Balay #define CHUNKSIZE 10 369cd0e1443SSatish Balay 3705615d1e5SSatish Balay #undef __FUNC__ 3715615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ" 372639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373cd0e1443SSatish Balay { 374cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3789df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 379cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 380cd0e1443SSatish Balay 381cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 382cd0e1443SSatish Balay row = im[k]; brow = row/bs; 3833b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 384e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 385e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 3863b2fbd54SBarry Smith #endif 3872c3acbe9SBarry Smith rp = aj + ai[brow]; 3882c3acbe9SBarry Smith ap = aa + bs2*ai[brow]; 3892c3acbe9SBarry Smith rmax = imax[brow]; 3902c3acbe9SBarry Smith nrow = ailen[brow]; 391cd0e1443SSatish Balay low = 0; 392cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 3933b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 394e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 3963b2fbd54SBarry Smith #endif 397a7c10996SSatish Balay col = in[l]; bcol = col/bs; 398cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 399cd0e1443SSatish Balay if (roworiented) { 400cd0e1443SSatish Balay value = *v++; 401cd0e1443SSatish Balay } 402cd0e1443SSatish Balay else { 403cd0e1443SSatish Balay value = v[k + l*m]; 404cd0e1443SSatish Balay } 405cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 4062c3acbe9SBarry Smith while (high-low > 7) { 407cd0e1443SSatish Balay t = (low+high)/2; 408cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 409cd0e1443SSatish Balay else low = t; 410cd0e1443SSatish Balay } 411cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 412cd0e1443SSatish Balay if (rp[i] > bcol) break; 413cd0e1443SSatish Balay if (rp[i] == bcol) { 4147e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 415cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 416cd0e1443SSatish Balay else *bap = value; 417f1241b54SBarry Smith goto noinsert1; 418cd0e1443SSatish Balay } 419cd0e1443SSatish Balay } 420c2653b3dSLois Curfman McInnes if (nonew == 1) goto noinsert1; 42111ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 422cd0e1443SSatish Balay if (nrow >= rmax) { 423cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 424cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 425cd0e1443SSatish Balay Scalar *new_a; 426cd0e1443SSatish Balay 42711ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 42896854ed6SLois Curfman McInnes 42996854ed6SLois Curfman McInnes /* Malloc new storage space */ 4307e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 431cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4327e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 433cd0e1443SSatish Balay new_i = new_j + new_nz; 434cd0e1443SSatish Balay 435cd0e1443SSatish Balay /* copy over old data into new slots */ 436cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4377e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 438a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 439a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 440a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 441cd0e1443SSatish Balay len*sizeof(int)); 442a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 443a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 444a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 445a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 446cd0e1443SSatish Balay /* free up old matrix storage */ 447cd0e1443SSatish Balay PetscFree(a->a); 448cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 449cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 450cd0e1443SSatish Balay a->singlemalloc = 1; 451cd0e1443SSatish Balay 452a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 453cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4547e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 45518e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 456cd0e1443SSatish Balay a->reallocs++; 457119a934aSSatish Balay a->nz++; 458cd0e1443SSatish Balay } 4597e67e3f9SSatish Balay N = nrow++ - 1; 460cd0e1443SSatish Balay /* shift up all the later entries in this row */ 461cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 462cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4637e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 464cd0e1443SSatish Balay } 4657e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 466cd0e1443SSatish Balay rp[i] = bcol; 4677e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 468f1241b54SBarry Smith noinsert1:; 469cd0e1443SSatish Balay low = i; 470cd0e1443SSatish Balay } 471cd0e1443SSatish Balay ailen[brow] = nrow; 472cd0e1443SSatish Balay } 473cd0e1443SSatish Balay return 0; 474cd0e1443SSatish Balay } 475cd0e1443SSatish Balay 4765615d1e5SSatish Balay #undef __FUNC__ 47705a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 47892c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 47992c4ed94SBarry Smith { 48092c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4818a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 4820e324ae4SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 4830e324ae4SSatish Balay int roworiented=a->roworiented; 4848a84c255SSatish Balay int *aj=a->j,nonew=a->nonew; 4850e324ae4SSatish Balay int bs2=a->bs2,bs=a->bs,stepval; 4860e324ae4SSatish Balay Scalar *ap,*value=v,*aa=a->a,*bap; 48792c4ed94SBarry Smith 4880e324ae4SSatish Balay if (roworiented) { 4890e324ae4SSatish Balay stepval = (n-1)*bs; 4900e324ae4SSatish Balay } else { 4910e324ae4SSatish Balay stepval = (m-1)*bs; 4920e324ae4SSatish Balay } 49392c4ed94SBarry Smith for ( k=0; k<m; k++ ) { /* loop over added rows */ 49492c4ed94SBarry Smith row = im[k]; 49592c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 49692c4ed94SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 4978a84c255SSatish Balay if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 49892c4ed94SBarry Smith #endif 49992c4ed94SBarry Smith rp = aj + ai[row]; 50092c4ed94SBarry Smith ap = aa + bs2*ai[row]; 50192c4ed94SBarry Smith rmax = imax[row]; 50292c4ed94SBarry Smith nrow = ailen[row]; 50392c4ed94SBarry Smith low = 0; 50492c4ed94SBarry Smith for ( l=0; l<n; l++ ) { /* loop over added columns */ 50592c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 50692c4ed94SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 5078a84c255SSatish Balay if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 50892c4ed94SBarry Smith #endif 50992c4ed94SBarry Smith col = in[l]; 51092c4ed94SBarry Smith if (roworiented) { 5110e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 5120e324ae4SSatish Balay } else { 5130e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 51492c4ed94SBarry Smith } 51592c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 51692c4ed94SBarry Smith while (high-low > 7) { 51792c4ed94SBarry Smith t = (low+high)/2; 51892c4ed94SBarry Smith if (rp[t] > col) high = t; 51992c4ed94SBarry Smith else low = t; 52092c4ed94SBarry Smith } 52192c4ed94SBarry Smith for ( i=low; i<high; i++ ) { 52292c4ed94SBarry Smith if (rp[i] > col) break; 52392c4ed94SBarry Smith if (rp[i] == col) { 5248a84c255SSatish Balay bap = ap + bs2*i; 5250e324ae4SSatish Balay if (roworiented) { 5268a84c255SSatish Balay if (is == ADD_VALUES) { 5270e324ae4SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval ) 5288a84c255SSatish Balay for (jj=ii; jj<bs2; jj+=bs ) 5298a84c255SSatish Balay bap[jj] += *value++; 5300e324ae4SSatish Balay } else { 5310e324ae4SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval ) 5320e324ae4SSatish Balay for (jj=ii; jj<bs2; jj+=bs ) 5330e324ae4SSatish Balay bap[jj] = *value++; 5348a84c255SSatish Balay } 5350e324ae4SSatish Balay } else { 5360e324ae4SSatish Balay if (is == ADD_VALUES) { 5370e324ae4SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval ) 5380e324ae4SSatish Balay for (jj=0; jj<bs; jj++ ) 5390e324ae4SSatish Balay *bap++ += *value++; 5400e324ae4SSatish Balay } else { 5410e324ae4SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval ) 5420e324ae4SSatish Balay for (jj=0; jj<bs; jj++ ) 5430e324ae4SSatish Balay *bap++ = *value++; 5440e324ae4SSatish Balay } 5458a84c255SSatish Balay } 546f1241b54SBarry Smith goto noinsert2; 54792c4ed94SBarry Smith } 54892c4ed94SBarry Smith } 54989280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 55011ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 55192c4ed94SBarry Smith if (nrow >= rmax) { 55292c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 55392c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 55492c4ed94SBarry Smith Scalar *new_a; 55592c4ed94SBarry Smith 55611ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 55789280ab3SLois Curfman McInnes 55892c4ed94SBarry Smith /* malloc new storage space */ 55992c4ed94SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 56092c4ed94SBarry Smith new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 56192c4ed94SBarry Smith new_j = (int *) (new_a + bs2*new_nz); 56292c4ed94SBarry Smith new_i = new_j + new_nz; 56392c4ed94SBarry Smith 56492c4ed94SBarry Smith /* copy over old data into new slots */ 56592c4ed94SBarry Smith for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 56692c4ed94SBarry Smith for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 56792c4ed94SBarry Smith PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 56892c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 56992c4ed94SBarry Smith PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 57092c4ed94SBarry Smith len*sizeof(int)); 57192c4ed94SBarry Smith PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 57292c4ed94SBarry Smith PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 57392c4ed94SBarry Smith PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 57492c4ed94SBarry Smith aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 57592c4ed94SBarry Smith /* free up old matrix storage */ 57692c4ed94SBarry Smith PetscFree(a->a); 57792c4ed94SBarry Smith if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 57892c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 57992c4ed94SBarry Smith a->singlemalloc = 1; 58092c4ed94SBarry Smith 58192c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 58292c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 58392c4ed94SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 58492c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 58592c4ed94SBarry Smith a->reallocs++; 58692c4ed94SBarry Smith a->nz++; 58792c4ed94SBarry Smith } 58892c4ed94SBarry Smith N = nrow++ - 1; 58992c4ed94SBarry Smith /* shift up all the later entries in this row */ 59092c4ed94SBarry Smith for ( ii=N; ii>=i; ii-- ) { 59192c4ed94SBarry Smith rp[ii+1] = rp[ii]; 59292c4ed94SBarry Smith PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 59392c4ed94SBarry Smith } 59492c4ed94SBarry Smith if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 59592c4ed94SBarry Smith rp[i] = col; 5968a84c255SSatish Balay bap = ap + bs2*i; 5970e324ae4SSatish Balay if (roworiented) { 5980e324ae4SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval) 5998a84c255SSatish Balay for (jj=ii; jj<bs2; jj+=bs ) 6000e324ae4SSatish Balay bap[jj] = *value++; 6010e324ae4SSatish Balay } else { 6020e324ae4SSatish Balay for ( ii=0; ii<bs; ii++,value+=stepval ) 6030e324ae4SSatish Balay for (jj=0; jj<bs; jj++ ) 6040e324ae4SSatish Balay *bap++ = *value++; 6050e324ae4SSatish Balay } 606f1241b54SBarry Smith noinsert2:; 60792c4ed94SBarry Smith low = i; 60892c4ed94SBarry Smith } 60992c4ed94SBarry Smith ailen[row] = nrow; 61092c4ed94SBarry Smith } 61192c4ed94SBarry Smith return 0; 61292c4ed94SBarry Smith } 61392c4ed94SBarry Smith 61492c4ed94SBarry Smith #undef __FUNC__ 6155eea60f9SBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 6168f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 6170b824a48SSatish Balay { 6180b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6190b824a48SSatish Balay *m = a->m; *n = a->n; 6200b824a48SSatish Balay return 0; 6210b824a48SSatish Balay } 6220b824a48SSatish Balay 6235615d1e5SSatish Balay #undef __FUNC__ 6245eea60f9SBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 6258f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 6260b824a48SSatish Balay { 6270b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6280b824a48SSatish Balay *m = 0; *n = a->m; 6290b824a48SSatish Balay return 0; 6300b824a48SSatish Balay } 6310b824a48SSatish Balay 6325615d1e5SSatish Balay #undef __FUNC__ 6335615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ" 6349867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6359867e207SSatish Balay { 6369867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6377e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 6389867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 6399867e207SSatish Balay 6409867e207SSatish Balay bs = a->bs; 6419867e207SSatish Balay ai = a->i; 6429867e207SSatish Balay aj = a->j; 6439867e207SSatish Balay aa = a->a; 6449df24120SSatish Balay bs2 = a->bs2; 6459867e207SSatish Balay 646e3372554SBarry Smith if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 6479867e207SSatish Balay 6489867e207SSatish Balay bn = row/bs; /* Block number */ 6499867e207SSatish Balay bp = row % bs; /* Block Position */ 6509867e207SSatish Balay M = ai[bn+1] - ai[bn]; 6519867e207SSatish Balay *nz = bs*M; 6529867e207SSatish Balay 6539867e207SSatish Balay if (v) { 6549867e207SSatish Balay *v = 0; 6559867e207SSatish Balay if (*nz) { 6569867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 6579867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6589867e207SSatish Balay v_i = *v + i*bs; 6597e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 6607e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 6619867e207SSatish Balay } 6629867e207SSatish Balay } 6639867e207SSatish Balay } 6649867e207SSatish Balay 6659867e207SSatish Balay if (idx) { 6669867e207SSatish Balay *idx = 0; 6679867e207SSatish Balay if (*nz) { 6689867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 6699867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6709867e207SSatish Balay idx_i = *idx + i*bs; 6719867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 6729867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 6739867e207SSatish Balay } 6749867e207SSatish Balay } 6759867e207SSatish Balay } 6769867e207SSatish Balay return 0; 6779867e207SSatish Balay } 6789867e207SSatish Balay 6795615d1e5SSatish Balay #undef __FUNC__ 6805eea60f9SBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 6819867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6829867e207SSatish Balay { 6839867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 6849867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 6859867e207SSatish Balay return 0; 6869867e207SSatish Balay } 687b6490206SBarry Smith 6885615d1e5SSatish Balay #undef __FUNC__ 6895615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ" 6908f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B) 691f2501298SSatish Balay { 692f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 693f2501298SSatish Balay Mat C; 694f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 6959df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 696f2501298SSatish Balay Scalar *array=a->a; 697f2501298SSatish Balay 698f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 699e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 700f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 701f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 702a7c10996SSatish Balay 703a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 704f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 705f2501298SSatish Balay PetscFree(col); 706f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 707f2501298SSatish Balay cols = rows + bs; 708f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 709f2501298SSatish Balay cols[0] = i*bs; 710f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 711f2501298SSatish Balay len = ai[i+1] - ai[i]; 712f2501298SSatish Balay for ( j=0; j<len; j++ ) { 713f2501298SSatish Balay rows[0] = (*aj++)*bs; 714f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 715f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 716f2501298SSatish Balay array += bs2; 717f2501298SSatish Balay } 718f2501298SSatish Balay } 7191073c447SSatish Balay PetscFree(rows); 720f2501298SSatish Balay 7216d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7226d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 723f2501298SSatish Balay 724f2501298SSatish Balay if (B != PETSC_NULL) { 725f2501298SSatish Balay *B = C; 726f2501298SSatish Balay } else { 727f2501298SSatish Balay /* This isn't really an in-place transpose */ 728f2501298SSatish Balay PetscFree(a->a); 729f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 730f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 731f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 732f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 733f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 734f2501298SSatish Balay PetscFree(a); 735f09e8eb9SSatish Balay PetscMemcpy(A,C,sizeof(struct _p_Mat)); 736f2501298SSatish Balay PetscHeaderDestroy(C); 737f2501298SSatish Balay } 738f2501298SSatish Balay return 0; 739f2501298SSatish Balay } 740f2501298SSatish Balay 741f2501298SSatish Balay 7425615d1e5SSatish Balay #undef __FUNC__ 7435615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 7448f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 745584200bdSSatish Balay { 746584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 747584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 748a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 749d402145bSBarry Smith int mbs = a->mbs, bs2 = a->bs2,rmax; 750584200bdSSatish Balay Scalar *aa = a->a, *ap; 751584200bdSSatish Balay 7526d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 753584200bdSSatish Balay 754d402145bSBarry Smith rmax = ailen[0]; 755584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 756584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 757584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 758d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 759584200bdSSatish Balay if (fshift) { 760a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 761584200bdSSatish Balay N = ailen[i]; 762584200bdSSatish Balay for ( j=0; j<N; j++ ) { 763584200bdSSatish Balay ip[j-fshift] = ip[j]; 7647e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 765584200bdSSatish Balay } 766584200bdSSatish Balay } 767584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 768584200bdSSatish Balay } 769584200bdSSatish Balay if (mbs) { 770584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 771584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 772584200bdSSatish Balay } 773584200bdSSatish Balay /* reset ilen and imax for each row */ 774584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 775584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 776584200bdSSatish Balay } 777a7c10996SSatish Balay a->nz = ai[mbs]; 778584200bdSSatish Balay 779584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 780584200bdSSatish Balay if (fshift && a->diag) { 781584200bdSSatish Balay PetscFree(a->diag); 782584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 783584200bdSSatish Balay a->diag = 0; 784584200bdSSatish Balay } 7853d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 786219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 7873d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 788584200bdSSatish Balay a->reallocs); 789d402145bSBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 790e2f3b5e9SSatish Balay a->reallocs = 0; 7914e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 7924e220ebcSLois Curfman McInnes 793584200bdSSatish Balay return 0; 794584200bdSSatish Balay } 795584200bdSSatish Balay 7965615d1e5SSatish Balay #undef __FUNC__ 7975615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ" 7988f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A) 7992593348eSBarry Smith { 800b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8019df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 8022593348eSBarry Smith return 0; 8032593348eSBarry Smith } 8042593348eSBarry Smith 8055615d1e5SSatish Balay #undef __FUNC__ 8065eea60f9SBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 807b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 8082593348eSBarry Smith { 8092593348eSBarry Smith Mat A = (Mat) obj; 810b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8112593348eSBarry Smith 8122593348eSBarry Smith #if defined(PETSC_LOG) 8132593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 8142593348eSBarry Smith #endif 8152593348eSBarry Smith PetscFree(a->a); 8162593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 8172593348eSBarry Smith if (a->diag) PetscFree(a->diag); 8182593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 8192593348eSBarry Smith if (a->imax) PetscFree(a->imax); 8202593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 821de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 8222593348eSBarry Smith PetscFree(a); 823f2655603SLois Curfman McInnes PLogObjectDestroy(A); 824f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 8252593348eSBarry Smith return 0; 8262593348eSBarry Smith } 8272593348eSBarry Smith 8285615d1e5SSatish Balay #undef __FUNC__ 8295eea60f9SBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 8308f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op) 8312593348eSBarry Smith { 832b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8336d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 8346d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 8356d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 836219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 8376d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 838c2653b3dSLois Curfman McInnes else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 83996854ed6SLois Curfman McInnes else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 8406d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 8416d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 842219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 8436d4a8577SBarry Smith op == MAT_SYMMETRIC || 8446d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 84590f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 8462b362799SSatish Balay op == MAT_IGNORE_OFF_PROC_ENTRIES) 84794a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 8486d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 849e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 8502593348eSBarry Smith else 851e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 8522593348eSBarry Smith return 0; 8532593348eSBarry Smith } 8542593348eSBarry Smith 8552593348eSBarry Smith 8562593348eSBarry Smith /* -------------------------------------------------------*/ 8572593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 8582593348eSBarry Smith /* -------------------------------------------------------*/ 859b6490206SBarry Smith #include "pinclude/plapack.h" 860b6490206SBarry Smith 8615615d1e5SSatish Balay #undef __FUNC__ 8625615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1" 86339b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 8642593348eSBarry Smith { 865b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 86639b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 867c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 8682593348eSBarry Smith 869c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 870c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 871b6490206SBarry Smith 872119a934aSSatish Balay idx = a->j; 873119a934aSSatish Balay v = a->a; 874119a934aSSatish Balay ii = a->i; 875119a934aSSatish Balay 876119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 877119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 878119a934aSSatish Balay sum = 0.0; 879119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 8801a6a6d98SBarry Smith z[i] = sum; 881119a934aSSatish Balay } 882c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 883c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 88439b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 88539b95ed1SBarry Smith return 0; 88639b95ed1SBarry Smith } 88739b95ed1SBarry Smith 8885615d1e5SSatish Balay #undef __FUNC__ 8895615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2" 89039b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 89139b95ed1SBarry Smith { 89239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 89339b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 89439b95ed1SBarry Smith register Scalar x1,x2; 89539b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 896c16cb8f2SBarry Smith int j,n; 89739b95ed1SBarry Smith 898c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 899c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 90039b95ed1SBarry Smith 90139b95ed1SBarry Smith idx = a->j; 90239b95ed1SBarry Smith v = a->a; 90339b95ed1SBarry Smith ii = a->i; 90439b95ed1SBarry Smith 905119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 906119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 907119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 908119a934aSSatish Balay for ( j=0; j<n; j++ ) { 909119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 910119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 911119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 912119a934aSSatish Balay v += 4; 913119a934aSSatish Balay } 9141a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 915119a934aSSatish Balay z += 2; 916119a934aSSatish Balay } 917c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 918c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 91939b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 92039b95ed1SBarry Smith return 0; 92139b95ed1SBarry Smith } 92239b95ed1SBarry Smith 9235615d1e5SSatish Balay #undef __FUNC__ 9245615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3" 92539b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 92639b95ed1SBarry Smith { 92739b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 92839b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 929c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 93039b95ed1SBarry Smith 931c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 932c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 93339b95ed1SBarry Smith 93439b95ed1SBarry Smith idx = a->j; 93539b95ed1SBarry Smith v = a->a; 93639b95ed1SBarry Smith ii = a->i; 93739b95ed1SBarry Smith 938119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 939119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 940119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 941119a934aSSatish Balay for ( j=0; j<n; j++ ) { 942119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 943119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 944119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 945119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 946119a934aSSatish Balay v += 9; 947119a934aSSatish Balay } 9481a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 949119a934aSSatish Balay z += 3; 950119a934aSSatish Balay } 951c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 952c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 95339b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 95439b95ed1SBarry Smith return 0; 95539b95ed1SBarry Smith } 95639b95ed1SBarry Smith 9575615d1e5SSatish Balay #undef __FUNC__ 9585615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4" 95939b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 96039b95ed1SBarry Smith { 96139b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 96239b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 96339b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 96439b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 965c16cb8f2SBarry Smith int j,n; 96639b95ed1SBarry Smith 967c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 968c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 96939b95ed1SBarry Smith 97039b95ed1SBarry Smith idx = a->j; 97139b95ed1SBarry Smith v = a->a; 97239b95ed1SBarry Smith ii = a->i; 97339b95ed1SBarry Smith 974119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 975119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 976119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 977119a934aSSatish Balay for ( j=0; j<n; j++ ) { 978119a934aSSatish Balay xb = x + 4*(*idx++); 979119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 980119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 981119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 982119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 983119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 984119a934aSSatish Balay v += 16; 985119a934aSSatish Balay } 9861a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 987119a934aSSatish Balay z += 4; 988119a934aSSatish Balay } 989c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 990c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 99139b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 99239b95ed1SBarry Smith return 0; 99339b95ed1SBarry Smith } 99439b95ed1SBarry Smith 9955615d1e5SSatish Balay #undef __FUNC__ 9965615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5" 99739b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 99839b95ed1SBarry Smith { 99939b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 100039b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 100139b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 1002c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 100339b95ed1SBarry Smith 1004c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1005c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 100639b95ed1SBarry Smith 100739b95ed1SBarry Smith idx = a->j; 100839b95ed1SBarry Smith v = a->a; 100939b95ed1SBarry Smith ii = a->i; 101039b95ed1SBarry Smith 1011119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1012119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1013119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1014119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1015119a934aSSatish Balay xb = x + 5*(*idx++); 1016119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1017119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1018119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1019119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1020119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1021119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1022119a934aSSatish Balay v += 25; 1023119a934aSSatish Balay } 10241a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1025119a934aSSatish Balay z += 5; 1026119a934aSSatish Balay } 1027c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1028c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 102939b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 103039b95ed1SBarry Smith return 0; 103139b95ed1SBarry Smith } 103239b95ed1SBarry Smith 10335615d1e5SSatish Balay #undef __FUNC__ 103448e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7" 103548e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 103648e9ddb2SSatish Balay { 103748e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103848e9ddb2SSatish Balay register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 103948e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 104048e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 104148e9ddb2SSatish Balay 104248e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 104348e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 104448e9ddb2SSatish Balay 104548e9ddb2SSatish Balay idx = a->j; 104648e9ddb2SSatish Balay v = a->a; 104748e9ddb2SSatish Balay ii = a->i; 104848e9ddb2SSatish Balay 104948e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 105048e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 105148e9ddb2SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 105248e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 105348e9ddb2SSatish Balay xb = x + 7*(*idx++); 105448e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 105548e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 105648e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 105748e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 105848e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 105948e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 106048e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 106148e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 106248e9ddb2SSatish Balay v += 49; 106348e9ddb2SSatish Balay } 106448e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 106548e9ddb2SSatish Balay z += 7; 106648e9ddb2SSatish Balay } 106748e9ddb2SSatish Balay 106848e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 106948e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 107048e9ddb2SSatish Balay PLogFlops(98*a->nz - a->m); 107148e9ddb2SSatish Balay return 0; 107248e9ddb2SSatish Balay } 107348e9ddb2SSatish Balay 107448e9ddb2SSatish Balay #undef __FUNC__ 10755615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N" 107639b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 107739b95ed1SBarry Smith { 107839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 107939b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 1080c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1081c16cb8f2SBarry Smith int _One = 1,ncols,k; 1082c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 108339b95ed1SBarry Smith 1084c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1085c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 108639b95ed1SBarry Smith 108739b95ed1SBarry Smith idx = a->j; 108839b95ed1SBarry Smith v = a->a; 108939b95ed1SBarry Smith ii = a->i; 109039b95ed1SBarry Smith 109139b95ed1SBarry Smith 1092119a934aSSatish Balay if (!a->mult_work) { 10933b547af2SSatish Balay k = PetscMax(a->m,a->n); 10942b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1095119a934aSSatish Balay } 1096119a934aSSatish Balay work = a->mult_work; 1097119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1098119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1099119a934aSSatish Balay ncols = n*bs; 1100119a934aSSatish Balay workt = work; 1101119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1102119a934aSSatish Balay xb = x + bs*(*idx++); 1103119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1104119a934aSSatish Balay workt += bs; 1105119a934aSSatish Balay } 11061a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1107119a934aSSatish Balay v += n*bs2; 1108119a934aSSatish Balay z += bs; 1109119a934aSSatish Balay } 1110c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1111c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 11121a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 1113bb42667fSSatish Balay return 0; 1114bb42667fSSatish Balay } 1115bb42667fSSatish Balay 11165615d1e5SSatish Balay #undef __FUNC__ 11175615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1118f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1119f44d3a6dSSatish Balay { 1120f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1121f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 1122c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 1123f44d3a6dSSatish Balay 1124c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1125c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1126c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1127f44d3a6dSSatish Balay 1128f44d3a6dSSatish Balay idx = a->j; 1129f44d3a6dSSatish Balay v = a->a; 1130f44d3a6dSSatish Balay ii = a->i; 1131f44d3a6dSSatish Balay 1132f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1133f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1134f44d3a6dSSatish Balay sum = y[i]; 1135f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 1136f44d3a6dSSatish Balay z[i] = sum; 1137f44d3a6dSSatish Balay } 1138c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1139c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1140c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1141f44d3a6dSSatish Balay PLogFlops(2*a->nz); 1142f44d3a6dSSatish Balay return 0; 1143f44d3a6dSSatish Balay } 1144f44d3a6dSSatish Balay 11455615d1e5SSatish Balay #undef __FUNC__ 11465615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1147f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1148f44d3a6dSSatish Balay { 1149f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1150f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1151f44d3a6dSSatish Balay register Scalar x1,x2; 1152f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1153c16cb8f2SBarry Smith int j,n; 1154f44d3a6dSSatish Balay 1155c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1156c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1157c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1158f44d3a6dSSatish Balay 1159f44d3a6dSSatish Balay idx = a->j; 1160f44d3a6dSSatish Balay v = a->a; 1161f44d3a6dSSatish Balay ii = a->i; 1162f44d3a6dSSatish Balay 1163f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1164f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1165f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 1166f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1167f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1168f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 1169f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 1170f44d3a6dSSatish Balay v += 4; 1171f44d3a6dSSatish Balay } 1172f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 1173f44d3a6dSSatish Balay z += 2; y += 2; 1174f44d3a6dSSatish Balay } 1175c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1176c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1177c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1178f44d3a6dSSatish Balay PLogFlops(4*a->nz); 1179f44d3a6dSSatish Balay return 0; 1180f44d3a6dSSatish Balay } 1181f44d3a6dSSatish Balay 11825615d1e5SSatish Balay #undef __FUNC__ 11835615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1184f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1185f44d3a6dSSatish Balay { 1186f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1187f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1188c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1189f44d3a6dSSatish Balay 1190c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1191c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1192c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1193f44d3a6dSSatish Balay 1194f44d3a6dSSatish Balay idx = a->j; 1195f44d3a6dSSatish Balay v = a->a; 1196f44d3a6dSSatish Balay ii = a->i; 1197f44d3a6dSSatish Balay 1198f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1199f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1200f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1201f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1202f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1203f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1204f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1205f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1206f44d3a6dSSatish Balay v += 9; 1207f44d3a6dSSatish Balay } 1208f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 1209f44d3a6dSSatish Balay z += 3; y += 3; 1210f44d3a6dSSatish Balay } 1211c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1212c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1213c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1214f44d3a6dSSatish Balay PLogFlops(18*a->nz); 1215f44d3a6dSSatish Balay return 0; 1216f44d3a6dSSatish Balay } 1217f44d3a6dSSatish Balay 12185615d1e5SSatish Balay #undef __FUNC__ 12195615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1220f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1221f44d3a6dSSatish Balay { 1222f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1223f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1224f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 1225f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1226c16cb8f2SBarry Smith int j,n; 1227f44d3a6dSSatish Balay 1228c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1229c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1230c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1231f44d3a6dSSatish Balay 1232f44d3a6dSSatish Balay idx = a->j; 1233f44d3a6dSSatish Balay v = a->a; 1234f44d3a6dSSatish Balay ii = a->i; 1235f44d3a6dSSatish Balay 1236f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1237f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1238f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1239f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1240f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1241f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1242f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1243f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1244f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1245f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1246f44d3a6dSSatish Balay v += 16; 1247f44d3a6dSSatish Balay } 1248f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1249f44d3a6dSSatish Balay z += 4; y += 4; 1250f44d3a6dSSatish Balay } 1251c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1252c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1253c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1254f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1255f44d3a6dSSatish Balay return 0; 1256f44d3a6dSSatish Balay } 1257f44d3a6dSSatish Balay 12585615d1e5SSatish Balay #undef __FUNC__ 12595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1260f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1261f44d3a6dSSatish Balay { 1262f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1263f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1264f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1265c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1266f44d3a6dSSatish Balay 1267c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1268c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1269c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1270f44d3a6dSSatish Balay 1271f44d3a6dSSatish Balay idx = a->j; 1272f44d3a6dSSatish Balay v = a->a; 1273f44d3a6dSSatish Balay ii = a->i; 1274f44d3a6dSSatish Balay 1275f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1276f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1277f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1278f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1279f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1280f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1281f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1282f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1283f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1284f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1285f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1286f44d3a6dSSatish Balay v += 25; 1287f44d3a6dSSatish Balay } 1288f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1289f44d3a6dSSatish Balay z += 5; y += 5; 1290f44d3a6dSSatish Balay } 1291c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1292c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1293c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1294f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1295f44d3a6dSSatish Balay return 0; 1296f44d3a6dSSatish Balay } 1297f44d3a6dSSatish Balay 12985615d1e5SSatish Balay #undef __FUNC__ 129948e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 130048e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 130148e9ddb2SSatish Balay { 130248e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 130348e9ddb2SSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 130448e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 130548e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 130648e9ddb2SSatish Balay 130748e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 130848e9ddb2SSatish Balay VecGetArray_Fast(yy,y); 130948e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 131048e9ddb2SSatish Balay 131148e9ddb2SSatish Balay idx = a->j; 131248e9ddb2SSatish Balay v = a->a; 131348e9ddb2SSatish Balay ii = a->i; 131448e9ddb2SSatish Balay 131548e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 131648e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 131748e9ddb2SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 131848e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 131948e9ddb2SSatish Balay xb = x + 7*(*idx++); 132048e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 132148e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 132248e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 132348e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 132448e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 132548e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 132648e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 132748e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 132848e9ddb2SSatish Balay v += 49; 132948e9ddb2SSatish Balay } 133048e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 133148e9ddb2SSatish Balay z += 7; y += 7; 133248e9ddb2SSatish Balay } 133348e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 133448e9ddb2SSatish Balay VecRestoreArray_Fast(yy,y); 133548e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 133648e9ddb2SSatish Balay PLogFlops(98*a->nz); 133748e9ddb2SSatish Balay return 0; 133848e9ddb2SSatish Balay } 133948e9ddb2SSatish Balay 134048e9ddb2SSatish Balay 134148e9ddb2SSatish Balay #undef __FUNC__ 13425615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1343f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1344f44d3a6dSSatish Balay { 1345f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1346f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1347f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1348f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1349f44d3a6dSSatish Balay 1350f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1351f44d3a6dSSatish Balay 1352c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1353c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1354f44d3a6dSSatish Balay 1355f44d3a6dSSatish Balay idx = a->j; 1356f44d3a6dSSatish Balay v = a->a; 1357f44d3a6dSSatish Balay ii = a->i; 1358f44d3a6dSSatish Balay 1359f44d3a6dSSatish Balay 1360f44d3a6dSSatish Balay if (!a->mult_work) { 1361f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1362f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1363f44d3a6dSSatish Balay } 1364f44d3a6dSSatish Balay work = a->mult_work; 1365f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1366f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1367f44d3a6dSSatish Balay ncols = n*bs; 1368f44d3a6dSSatish Balay workt = work; 1369f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1370f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1371f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1372f44d3a6dSSatish Balay workt += bs; 1373f44d3a6dSSatish Balay } 1374f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1375f44d3a6dSSatish Balay v += n*bs2; 1376f44d3a6dSSatish Balay z += bs; 1377f44d3a6dSSatish Balay } 1378c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1379c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1380f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1381f44d3a6dSSatish Balay return 0; 1382f44d3a6dSSatish Balay } 1383f44d3a6dSSatish Balay 13845615d1e5SSatish Balay #undef __FUNC__ 13855615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ" 13868f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1387bb42667fSSatish Balay { 1388bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 13891a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1390bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1391bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 13929df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1393119a934aSSatish Balay 1394119a934aSSatish Balay 139590f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 139690f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1397bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1398bb42667fSSatish Balay 1399119a934aSSatish Balay idx = a->j; 1400119a934aSSatish Balay v = a->a; 1401119a934aSSatish Balay ii = a->i; 1402119a934aSSatish Balay 1403119a934aSSatish Balay switch (bs) { 1404119a934aSSatish Balay case 1: 1405119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1406119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1407119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1408119a934aSSatish Balay ib = idx + ai[i]; 1409119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1410bb1453f0SSatish Balay rval = ib[j]; 1411bb1453f0SSatish Balay z[rval] += *v++ * x1; 1412119a934aSSatish Balay } 1413119a934aSSatish Balay } 1414119a934aSSatish Balay break; 1415119a934aSSatish Balay case 2: 1416119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1417119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1418119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1419119a934aSSatish Balay ib = idx + ai[i]; 1420119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1421119a934aSSatish Balay rval = ib[j]*2; 1422bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1423bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1424119a934aSSatish Balay v += 4; 1425119a934aSSatish Balay } 1426119a934aSSatish Balay } 1427119a934aSSatish Balay break; 1428119a934aSSatish Balay case 3: 1429119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1430119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1431119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1432119a934aSSatish Balay ib = idx + ai[i]; 1433119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1434119a934aSSatish Balay rval = ib[j]*3; 1435bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1436bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1437bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1438119a934aSSatish Balay v += 9; 1439119a934aSSatish Balay } 1440119a934aSSatish Balay } 1441119a934aSSatish Balay break; 1442119a934aSSatish Balay case 4: 1443119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1444119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1445119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1446119a934aSSatish Balay ib = idx + ai[i]; 1447119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1448119a934aSSatish Balay rval = ib[j]*4; 1449bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1450bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1451bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1452bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1453119a934aSSatish Balay v += 16; 1454119a934aSSatish Balay } 1455119a934aSSatish Balay } 1456119a934aSSatish Balay break; 1457119a934aSSatish Balay case 5: 1458119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1459119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1460119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1461119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1462119a934aSSatish Balay ib = idx + ai[i]; 1463119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1464119a934aSSatish Balay rval = ib[j]*5; 1465bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1466bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1467bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1468bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1469bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1470119a934aSSatish Balay v += 25; 1471119a934aSSatish Balay } 1472119a934aSSatish Balay } 1473119a934aSSatish Balay break; 1474119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1475119a934aSSatish Balay default: { 1476119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1477119a934aSSatish Balay if (!a->mult_work) { 14783b547af2SSatish Balay k = PetscMax(a->m,a->n); 1479bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1480119a934aSSatish Balay CHKPTRQ(a->mult_work); 1481119a934aSSatish Balay } 1482119a934aSSatish Balay work = a->mult_work; 1483119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1484119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1485119a934aSSatish Balay ncols = n*bs; 1486119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1487119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1488119a934aSSatish Balay v += n*bs2; 1489119a934aSSatish Balay x += bs; 1490119a934aSSatish Balay workt = work; 1491119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1492119a934aSSatish Balay zb = z + bs*(*idx++); 1493bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1494119a934aSSatish Balay workt += bs; 1495119a934aSSatish Balay } 1496119a934aSSatish Balay } 1497119a934aSSatish Balay } 1498119a934aSSatish Balay } 14990419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 15000419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1501faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1502faf6580fSSatish Balay return 0; 1503faf6580fSSatish Balay } 15041c351548SSatish Balay 15055615d1e5SSatish Balay #undef __FUNC__ 15065615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 15078f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1508faf6580fSSatish Balay { 1509faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1510faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1511faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1512faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1513faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1514faf6580fSSatish Balay 1515faf6580fSSatish Balay 1516faf6580fSSatish Balay 151790f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 151890f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1519faf6580fSSatish Balay 1520648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1521648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1522648c1d95SSatish Balay 1523faf6580fSSatish Balay 1524faf6580fSSatish Balay idx = a->j; 1525faf6580fSSatish Balay v = a->a; 1526faf6580fSSatish Balay ii = a->i; 1527faf6580fSSatish Balay 1528faf6580fSSatish Balay switch (bs) { 1529faf6580fSSatish Balay case 1: 1530faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1531faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1532faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1533faf6580fSSatish Balay ib = idx + ai[i]; 1534faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1535faf6580fSSatish Balay rval = ib[j]; 1536faf6580fSSatish Balay z[rval] += *v++ * x1; 1537faf6580fSSatish Balay } 1538faf6580fSSatish Balay } 1539faf6580fSSatish Balay break; 1540faf6580fSSatish Balay case 2: 1541faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1542faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1543faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1544faf6580fSSatish Balay ib = idx + ai[i]; 1545faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1546faf6580fSSatish Balay rval = ib[j]*2; 1547faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1548faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1549faf6580fSSatish Balay v += 4; 1550faf6580fSSatish Balay } 1551faf6580fSSatish Balay } 1552faf6580fSSatish Balay break; 1553faf6580fSSatish Balay case 3: 1554faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1555faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1556faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1557faf6580fSSatish Balay ib = idx + ai[i]; 1558faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1559faf6580fSSatish Balay rval = ib[j]*3; 1560faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1561faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1562faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1563faf6580fSSatish Balay v += 9; 1564faf6580fSSatish Balay } 1565faf6580fSSatish Balay } 1566faf6580fSSatish Balay break; 1567faf6580fSSatish Balay case 4: 1568faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1569faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1570faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1571faf6580fSSatish Balay ib = idx + ai[i]; 1572faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1573faf6580fSSatish Balay rval = ib[j]*4; 1574faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1575faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1576faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1577faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1578faf6580fSSatish Balay v += 16; 1579faf6580fSSatish Balay } 1580faf6580fSSatish Balay } 1581faf6580fSSatish Balay break; 1582faf6580fSSatish Balay case 5: 1583faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1584faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1585faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1586faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1587faf6580fSSatish Balay ib = idx + ai[i]; 1588faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1589faf6580fSSatish Balay rval = ib[j]*5; 1590faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1591faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1592faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1593faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1594faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1595faf6580fSSatish Balay v += 25; 1596faf6580fSSatish Balay } 1597faf6580fSSatish Balay } 1598faf6580fSSatish Balay break; 1599faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1600faf6580fSSatish Balay default: { 1601faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1602faf6580fSSatish Balay if (!a->mult_work) { 1603faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1604faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1605faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1606faf6580fSSatish Balay } 1607faf6580fSSatish Balay work = a->mult_work; 1608faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1609faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1610faf6580fSSatish Balay ncols = n*bs; 1611faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1612faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1613faf6580fSSatish Balay v += n*bs2; 1614faf6580fSSatish Balay x += bs; 1615faf6580fSSatish Balay workt = work; 1616faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1617faf6580fSSatish Balay zb = z + bs*(*idx++); 1618faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1619faf6580fSSatish Balay workt += bs; 1620faf6580fSSatish Balay } 1621faf6580fSSatish Balay } 1622faf6580fSSatish Balay } 1623faf6580fSSatish Balay } 1624faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1625faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1626faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 16272593348eSBarry Smith return 0; 16282593348eSBarry Smith } 16292593348eSBarry Smith 16305615d1e5SSatish Balay #undef __FUNC__ 16315eea60f9SBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 16328f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 16332593348eSBarry Smith { 1634b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16354e220ebcSLois Curfman McInnes 16364e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 16374e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 16384e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 16394e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 16404e220ebcSLois Curfman McInnes info->block_size = a->bs2; 16414e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 16424e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 16434e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 16444e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 16454e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 16464e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 16474e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 16484e220ebcSLois Curfman McInnes info->memory = A->mem; 16494e220ebcSLois Curfman McInnes if (A->factor) { 16504e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 16514e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 16524e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 16534e220ebcSLois Curfman McInnes } else { 16544e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 16554e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16564e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16574e220ebcSLois Curfman McInnes } 16582593348eSBarry Smith return 0; 16592593348eSBarry Smith } 16602593348eSBarry Smith 16615615d1e5SSatish Balay #undef __FUNC__ 16625615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ" 16638f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 166491d316f6SSatish Balay { 166591d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 166691d316f6SSatish Balay 1667e3372554SBarry Smith if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 166891d316f6SSatish Balay 166991d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 167091d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1671a7c10996SSatish Balay (a->nz != b->nz)) { 167291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 167391d316f6SSatish Balay } 167491d316f6SSatish Balay 167591d316f6SSatish Balay /* if the a->i are the same */ 167691d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 167791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 167891d316f6SSatish Balay } 167991d316f6SSatish Balay 168091d316f6SSatish Balay /* if a->j are the same */ 168191d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 168291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 168391d316f6SSatish Balay } 168491d316f6SSatish Balay 168591d316f6SSatish Balay /* if a->a are the same */ 168691d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 168791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 168891d316f6SSatish Balay } 168991d316f6SSatish Balay *flg = PETSC_TRUE; 169091d316f6SSatish Balay return 0; 169191d316f6SSatish Balay 169291d316f6SSatish Balay } 169391d316f6SSatish Balay 16945615d1e5SSatish Balay #undef __FUNC__ 16955615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 16968f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 169791d316f6SSatish Balay { 169891d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16997e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 170017e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 170117e48fc4SSatish Balay 17020513a670SBarry Smith if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 170317e48fc4SSatish Balay bs = a->bs; 170417e48fc4SSatish Balay aa = a->a; 170517e48fc4SSatish Balay ai = a->i; 170617e48fc4SSatish Balay aj = a->j; 170717e48fc4SSatish Balay ambs = a->mbs; 17089df24120SSatish Balay bs2 = a->bs2; 170991d316f6SSatish Balay 171091d316f6SSatish Balay VecSet(&zero,v); 171190f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1712e3372554SBarry Smith if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 171317e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 171417e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 171517e48fc4SSatish Balay if (aj[j] == i) { 171617e48fc4SSatish Balay row = i*bs; 17177e67e3f9SSatish Balay aa_j = aa+j*bs2; 17187e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 171991d316f6SSatish Balay break; 172091d316f6SSatish Balay } 172191d316f6SSatish Balay } 172291d316f6SSatish Balay } 172391d316f6SSatish Balay return 0; 172491d316f6SSatish Balay } 172591d316f6SSatish Balay 17265615d1e5SSatish Balay #undef __FUNC__ 17275615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 17288f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 17299867e207SSatish Balay { 17309867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17319867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 17327e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 17339867e207SSatish Balay 17349867e207SSatish Balay ai = a->i; 17359867e207SSatish Balay aj = a->j; 17369867e207SSatish Balay aa = a->a; 17379867e207SSatish Balay m = a->m; 17389867e207SSatish Balay n = a->n; 17399867e207SSatish Balay bs = a->bs; 17409867e207SSatish Balay mbs = a->mbs; 17419df24120SSatish Balay bs2 = a->bs2; 17429867e207SSatish Balay if (ll) { 174390f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1744e3372554SBarry Smith if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 17459867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17469867e207SSatish Balay M = ai[i+1] - ai[i]; 17479867e207SSatish Balay li = l + i*bs; 17487e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17499867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17507e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 17519867e207SSatish Balay (*v++) *= li[k%bs]; 17529867e207SSatish Balay } 17539867e207SSatish Balay } 17549867e207SSatish Balay } 17559867e207SSatish Balay } 17569867e207SSatish Balay 17579867e207SSatish Balay if (rr) { 175890f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1759e3372554SBarry Smith if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 17609867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17619867e207SSatish Balay M = ai[i+1] - ai[i]; 17627e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17639867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17649867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 17659867e207SSatish Balay for ( k=0; k<bs; k++ ) { 17669867e207SSatish Balay x = ri[k]; 17679867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 17689867e207SSatish Balay } 17699867e207SSatish Balay } 17709867e207SSatish Balay } 17719867e207SSatish Balay } 17729867e207SSatish Balay return 0; 17739867e207SSatish Balay } 17749867e207SSatish Balay 17759867e207SSatish Balay 1776b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1777b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 17782a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1779736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1780736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 17811a6a6d98SBarry Smith 17821a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 17831a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 17841a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 17851a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 17861a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 17871a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 178848e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 17891a6a6d98SBarry Smith 17907fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 17917fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 17927fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 17937fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 17947fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 17957fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 17962593348eSBarry Smith 17975615d1e5SSatish Balay #undef __FUNC__ 17985615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 17998f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 18002593348eSBarry Smith { 1801b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18022593348eSBarry Smith Scalar *v = a->a; 18032593348eSBarry Smith double sum = 0.0; 18049df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 18052593348eSBarry Smith 18062593348eSBarry Smith if (type == NORM_FROBENIUS) { 18070419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 18082593348eSBarry Smith #if defined(PETSC_COMPLEX) 18092593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 18102593348eSBarry Smith #else 18112593348eSBarry Smith sum += (*v)*(*v); v++; 18122593348eSBarry Smith #endif 18132593348eSBarry Smith } 18142593348eSBarry Smith *norm = sqrt(sum); 18152593348eSBarry Smith } 18162593348eSBarry Smith else { 1817e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 18182593348eSBarry Smith } 18192593348eSBarry Smith return 0; 18202593348eSBarry Smith } 18212593348eSBarry Smith 18222593348eSBarry Smith /* 18232593348eSBarry Smith note: This can only work for identity for row and col. It would 18242593348eSBarry Smith be good to check this and otherwise generate an error. 18252593348eSBarry Smith */ 18265615d1e5SSatish Balay #undef __FUNC__ 18275615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ" 18288f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 18292593348eSBarry Smith { 1830b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18312593348eSBarry Smith Mat outA; 1832de6a44a3SBarry Smith int ierr; 18332593348eSBarry Smith 1834e3372554SBarry Smith if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 18352593348eSBarry Smith 18362593348eSBarry Smith outA = inA; 18372593348eSBarry Smith inA->factor = FACTOR_LU; 18382593348eSBarry Smith a->row = row; 18392593348eSBarry Smith a->col = col; 18402593348eSBarry Smith 1841*eed86810SBarry Smith if (!a->solve_work) { 1842de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1843*eed86810SBarry Smith PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1844*eed86810SBarry Smith } 18452593348eSBarry Smith 18462593348eSBarry Smith if (!a->diag) { 1847b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 18482593348eSBarry Smith } 18497fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 18502593348eSBarry Smith return 0; 18512593348eSBarry Smith } 18522593348eSBarry Smith 18535615d1e5SSatish Balay #undef __FUNC__ 18545615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ" 18558f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 18562593348eSBarry Smith { 1857b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18589df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1859b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1860b6490206SBarry Smith PLogFlops(totalnz); 18612593348eSBarry Smith return 0; 18622593348eSBarry Smith } 18632593348eSBarry Smith 18645615d1e5SSatish Balay #undef __FUNC__ 18655615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ" 18668f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 18677e67e3f9SSatish Balay { 18687e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18697e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1870a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 18719df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 18727e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 18737e67e3f9SSatish Balay 18747e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 18757e67e3f9SSatish Balay row = im[k]; brow = row/bs; 1876e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 1877e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 1878a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 18797e67e3f9SSatish Balay nrow = ailen[brow]; 18807e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 1881e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1882e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1883a7c10996SSatish Balay col = in[l] ; 18847e67e3f9SSatish Balay bcol = col/bs; 18857e67e3f9SSatish Balay cidx = col%bs; 18867e67e3f9SSatish Balay ridx = row%bs; 18877e67e3f9SSatish Balay high = nrow; 18887e67e3f9SSatish Balay low = 0; /* assume unsorted */ 18897e67e3f9SSatish Balay while (high-low > 5) { 18907e67e3f9SSatish Balay t = (low+high)/2; 18917e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 18927e67e3f9SSatish Balay else low = t; 18937e67e3f9SSatish Balay } 18947e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 18957e67e3f9SSatish Balay if (rp[i] > bcol) break; 18967e67e3f9SSatish Balay if (rp[i] == bcol) { 18977e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 18987e67e3f9SSatish Balay goto finished; 18997e67e3f9SSatish Balay } 19007e67e3f9SSatish Balay } 19017e67e3f9SSatish Balay *v++ = zero; 19027e67e3f9SSatish Balay finished:; 19037e67e3f9SSatish Balay } 19047e67e3f9SSatish Balay } 19057e67e3f9SSatish Balay return 0; 19067e67e3f9SSatish Balay } 19077e67e3f9SSatish Balay 19085615d1e5SSatish Balay #undef __FUNC__ 19095eea60f9SBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 19108f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 19115a838052SSatish Balay { 19125a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 19135a838052SSatish Balay *bs = baij->bs; 19145a838052SSatish Balay return 0; 19155a838052SSatish Balay } 19165a838052SSatish Balay 1917d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 19185615d1e5SSatish Balay #undef __FUNC__ 19195615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1920d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1921d9b7c43dSSatish Balay { 1922d9b7c43dSSatish Balay int i,row; 1923d9b7c43dSSatish Balay row = idx[0]; 1924d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1925d9b7c43dSSatish Balay 1926d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1927d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1928d9b7c43dSSatish Balay } 1929d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1930d9b7c43dSSatish Balay return 0; 1931d9b7c43dSSatish Balay } 1932d9b7c43dSSatish Balay 19335615d1e5SSatish Balay #undef __FUNC__ 19345615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ" 19358f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1936d9b7c43dSSatish Balay { 1937d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1938d9b7c43dSSatish Balay IS is_local; 1939d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1940d9b7c43dSSatish Balay PetscTruth flg; 1941d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1942d9b7c43dSSatish Balay 1943d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1944d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1945d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1946537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1947d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1948d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1949d9b7c43dSSatish Balay 1950d9b7c43dSSatish Balay i = 0; 1951d9b7c43dSSatish Balay while (i < is_n) { 1952e3372554SBarry Smith if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1953d9b7c43dSSatish Balay flg = PETSC_FALSE; 1954d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1955d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1956d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1957d9b7c43dSSatish Balay i += bs; 1958d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1959d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1960d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1961d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1962d9b7c43dSSatish Balay aa[0] = zero; 1963d9b7c43dSSatish Balay aa+=bs; 1964d9b7c43dSSatish Balay } 1965d9b7c43dSSatish Balay i++; 1966d9b7c43dSSatish Balay } 1967d9b7c43dSSatish Balay } 1968d9b7c43dSSatish Balay if (diag) { 1969d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1970d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1971d9b7c43dSSatish Balay } 1972d9b7c43dSSatish Balay } 1973d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1974d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1975d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 19769a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1977d9b7c43dSSatish Balay 1978d9b7c43dSSatish Balay return 0; 1979d9b7c43dSSatish Balay } 19801c351548SSatish Balay 19815615d1e5SSatish Balay #undef __FUNC__ 19825eea60f9SBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 1983ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1984ba4ca20aSSatish Balay { 1985ba4ca20aSSatish Balay static int called = 0; 1986ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1987ba4ca20aSSatish Balay 1988ba4ca20aSSatish Balay if (called) return 0; else called = 1; 1989ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1990ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 1991ba4ca20aSSatish Balay return 0; 1992ba4ca20aSSatish Balay } 1993d9b7c43dSSatish Balay 19942593348eSBarry Smith /* -------------------------------------------------------------------*/ 1995cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 19969867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1997f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1998faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 19991a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 2000b6490206SBarry Smith 0,0, 2001de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 2002b6490206SBarry Smith 0, 2003f2501298SSatish Balay MatTranspose_SeqBAIJ, 200417e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 20059867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2006584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 2007b6490206SBarry Smith 0, 2008d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 20097fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2010b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2011de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 2012d402145bSBarry Smith 0,0, 2013b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 2014b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 2015af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 20167e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 2017ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 20183b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 20193b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 202092c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 202192c4ed94SBarry Smith 0,0,0,0,0,0, 202292c4ed94SBarry Smith MatSetValuesBlocked_SeqBAIJ}; 20232593348eSBarry Smith 20245615d1e5SSatish Balay #undef __FUNC__ 20255615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ" 20262593348eSBarry Smith /*@C 202744cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 202844cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 202944cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 20302bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 20312bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 20322593348eSBarry Smith 20332593348eSBarry Smith Input Parameters: 2034029af93fSBarry Smith . comm - MPI communicator, set to PETSC_COMM_SELF 2035b6490206SBarry Smith . bs - size of block 20362593348eSBarry Smith . m - number of rows 20372593348eSBarry Smith . n - number of columns 2038b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 20392bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 20402bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 20412593348eSBarry Smith 20422593348eSBarry Smith Output Parameter: 20432593348eSBarry Smith . A - the matrix 20442593348eSBarry Smith 20450513a670SBarry Smith Options Database Keys: 20460513a670SBarry Smith $ -mat_no_unroll - uses code that does not unroll the loops in the 20470513a670SBarry Smith $ block calculations (much solver) 20480513a670SBarry Smith $ -mat_block_size - size of the blocks to use 20490513a670SBarry Smith 20502593348eSBarry Smith Notes: 205144cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 20522593348eSBarry Smith storage. That is, the stored row and column indices can begin at 205344cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 20542593348eSBarry Smith 20552593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 20562593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 20572593348eSBarry Smith allocation. For additional details, see the users manual chapter on 20586da5968aSLois Curfman McInnes matrices. 20592593348eSBarry Smith 206044cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 20612593348eSBarry Smith @*/ 2062b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 20632593348eSBarry Smith { 20642593348eSBarry Smith Mat B; 2065b6490206SBarry Smith Mat_SeqBAIJ *b; 20663b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 20673b2fbd54SBarry Smith 20683b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 2069e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2070b6490206SBarry Smith 20710513a670SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 20720513a670SBarry Smith 2073f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 2074e3372554SBarry Smith SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 20752593348eSBarry Smith 20762593348eSBarry Smith *A = 0; 2077f09e8eb9SSatish Balay PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 20782593348eSBarry Smith PLogObjectCreate(B); 2079b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 208044cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 20812593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 20821a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 20831a6a6d98SBarry Smith if (!flg) { 20847fc0212eSBarry Smith switch (bs) { 20857fc0212eSBarry Smith case 1: 20867fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 20871a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 208839b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 2089f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 20907fc0212eSBarry Smith break; 20914eeb42bcSBarry Smith case 2: 20924eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 20931a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 209439b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 2095f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 20966d84be18SBarry Smith break; 2097f327dd97SBarry Smith case 3: 2098f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 20991a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 210039b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 2101f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 21024eeb42bcSBarry Smith break; 21036d84be18SBarry Smith case 4: 21046d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 21051a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 210639b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 2107f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 21086d84be18SBarry Smith break; 21096d84be18SBarry Smith case 5: 21106d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 21111a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 211239b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 2113f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 21146d84be18SBarry Smith break; 211548e9ddb2SSatish Balay case 7: 211648e9ddb2SSatish Balay B->ops.mult = MatMult_SeqBAIJ_7; 211748e9ddb2SSatish Balay B->ops.solve = MatSolve_SeqBAIJ_7; 211848e9ddb2SSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_7; 211948e9ddb2SSatish Balay break; 21207fc0212eSBarry Smith } 21211a6a6d98SBarry Smith } 2122b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 2123b6490206SBarry Smith B->view = MatView_SeqBAIJ; 21242593348eSBarry Smith B->factor = 0; 21252593348eSBarry Smith B->lupivotthreshold = 1.0; 212690f02eecSBarry Smith B->mapping = 0; 21272593348eSBarry Smith b->row = 0; 21282593348eSBarry Smith b->col = 0; 21292593348eSBarry Smith b->reallocs = 0; 21302593348eSBarry Smith 213144cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 213244cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 2133b6490206SBarry Smith b->mbs = mbs; 2134f2501298SSatish Balay b->nbs = nbs; 2135b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 21362593348eSBarry Smith if (nnz == PETSC_NULL) { 2137119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 21382593348eSBarry Smith else if (nz <= 0) nz = 1; 2139b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2140b6490206SBarry Smith nz = nz*mbs; 21412593348eSBarry Smith } 21422593348eSBarry Smith else { 21432593348eSBarry Smith nz = 0; 2144b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 21452593348eSBarry Smith } 21462593348eSBarry Smith 21472593348eSBarry Smith /* allocate the matrix space */ 21487e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 21492593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 21507e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 21517e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 21522593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 21532593348eSBarry Smith b->i = b->j + nz; 21542593348eSBarry Smith b->singlemalloc = 1; 21552593348eSBarry Smith 2156de6a44a3SBarry Smith b->i[0] = 0; 2157b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 21582593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 21592593348eSBarry Smith } 21602593348eSBarry Smith 2161b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2162b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2163f09e8eb9SSatish Balay PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2164b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 21652593348eSBarry Smith 2166b6490206SBarry Smith b->bs = bs; 21679df24120SSatish Balay b->bs2 = bs2; 2168b6490206SBarry Smith b->mbs = mbs; 21692593348eSBarry Smith b->nz = 0; 217018e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 21712593348eSBarry Smith b->sorted = 0; 21722593348eSBarry Smith b->roworiented = 1; 21732593348eSBarry Smith b->nonew = 0; 21742593348eSBarry Smith b->diag = 0; 21752593348eSBarry Smith b->solve_work = 0; 2176de6a44a3SBarry Smith b->mult_work = 0; 21772593348eSBarry Smith b->spptr = 0; 21784e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 21794e220ebcSLois Curfman McInnes 21802593348eSBarry Smith *A = B; 21812593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 21822593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 21832593348eSBarry Smith return 0; 21842593348eSBarry Smith } 21852593348eSBarry Smith 21865615d1e5SSatish Balay #undef __FUNC__ 21875615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2188b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 21892593348eSBarry Smith { 21902593348eSBarry Smith Mat C; 2191b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 21929df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2193de6a44a3SBarry Smith 2194e3372554SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 21952593348eSBarry Smith 21962593348eSBarry Smith *B = 0; 2197f09e8eb9SSatish Balay PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 21982593348eSBarry Smith PLogObjectCreate(C); 2199b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 22002593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2201b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 2202b6490206SBarry Smith C->view = MatView_SeqBAIJ; 22032593348eSBarry Smith C->factor = A->factor; 22042593348eSBarry Smith c->row = 0; 22052593348eSBarry Smith c->col = 0; 22062593348eSBarry Smith C->assembled = PETSC_TRUE; 22072593348eSBarry Smith 220844cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 220944cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 221044cd7ae7SLois Curfman McInnes C->M = a->m; 221144cd7ae7SLois Curfman McInnes C->N = a->n; 221244cd7ae7SLois Curfman McInnes 2213b6490206SBarry Smith c->bs = a->bs; 22149df24120SSatish Balay c->bs2 = a->bs2; 2215b6490206SBarry Smith c->mbs = a->mbs; 2216b6490206SBarry Smith c->nbs = a->nbs; 22172593348eSBarry Smith 2218b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2219b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2220b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22212593348eSBarry Smith c->imax[i] = a->imax[i]; 22222593348eSBarry Smith c->ilen[i] = a->ilen[i]; 22232593348eSBarry Smith } 22242593348eSBarry Smith 22252593348eSBarry Smith /* allocate the matrix space */ 22262593348eSBarry Smith c->singlemalloc = 1; 22277e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 22282593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 22297e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 2230de6a44a3SBarry Smith c->i = c->j + nz; 2231b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2232b6490206SBarry Smith if (mbs > 0) { 2233de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 22342593348eSBarry Smith if (cpvalues == COPY_VALUES) { 22357e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 22362593348eSBarry Smith } 22372593348eSBarry Smith } 22382593348eSBarry Smith 2239f09e8eb9SSatish Balay PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 22402593348eSBarry Smith c->sorted = a->sorted; 22412593348eSBarry Smith c->roworiented = a->roworiented; 22422593348eSBarry Smith c->nonew = a->nonew; 22432593348eSBarry Smith 22442593348eSBarry Smith if (a->diag) { 2245b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2246b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2247b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22482593348eSBarry Smith c->diag[i] = a->diag[i]; 22492593348eSBarry Smith } 22502593348eSBarry Smith } 22512593348eSBarry Smith else c->diag = 0; 22522593348eSBarry Smith c->nz = a->nz; 22532593348eSBarry Smith c->maxnz = a->maxnz; 22542593348eSBarry Smith c->solve_work = 0; 22552593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 22567fc0212eSBarry Smith c->mult_work = 0; 22572593348eSBarry Smith *B = C; 22582593348eSBarry Smith return 0; 22592593348eSBarry Smith } 22602593348eSBarry Smith 22615615d1e5SSatish Balay #undef __FUNC__ 22625615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ" 226319bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 22642593348eSBarry Smith { 2265b6490206SBarry Smith Mat_SeqBAIJ *a; 22662593348eSBarry Smith Mat B; 2267de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2268b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 226935aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2270a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 2271b6490206SBarry Smith Scalar *aa; 227219bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 22732593348eSBarry Smith 22741a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2275de6a44a3SBarry Smith bs2 = bs*bs; 2276b6490206SBarry Smith 22772593348eSBarry Smith MPI_Comm_size(comm,&size); 2278e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"view must have one processor"); 227990ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 228077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2281e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 22822593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 22832593348eSBarry Smith 2284e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 228535aab85fSBarry Smith 228635aab85fSBarry Smith /* 228735aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 228835aab85fSBarry Smith divisible by the blocksize 228935aab85fSBarry Smith */ 2290b6490206SBarry Smith mbs = M/bs; 229135aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 229235aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 229335aab85fSBarry Smith else mbs++; 229435aab85fSBarry Smith if (extra_rows) { 2295537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 229635aab85fSBarry Smith } 2297b6490206SBarry Smith 22982593348eSBarry Smith /* read in row lengths */ 229935aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 230077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 230135aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 23022593348eSBarry Smith 2303b6490206SBarry Smith /* read in column indices */ 230435aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 230577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 230635aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2307b6490206SBarry Smith 2308b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2309b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2310b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 231135aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 231235aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 231335aab85fSBarry Smith masked = mask + mbs; 2314b6490206SBarry Smith rowcount = 0; nzcount = 0; 2315b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 231635aab85fSBarry Smith nmask = 0; 2317b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2318b6490206SBarry Smith kmax = rowlengths[rowcount]; 2319b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 232035aab85fSBarry Smith tmp = jj[nzcount++]/bs; 232135aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2322b6490206SBarry Smith } 2323b6490206SBarry Smith rowcount++; 2324b6490206SBarry Smith } 232535aab85fSBarry Smith browlengths[i] += nmask; 232635aab85fSBarry Smith /* zero out the mask elements we set */ 232735aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2328b6490206SBarry Smith } 2329b6490206SBarry Smith 23302593348eSBarry Smith /* create our matrix */ 233135aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 233235aab85fSBarry Smith CHKERRQ(ierr); 23332593348eSBarry Smith B = *A; 2334b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 23352593348eSBarry Smith 23362593348eSBarry Smith /* set matrix "i" values */ 2337de6a44a3SBarry Smith a->i[0] = 0; 2338b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 2339b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2340b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 23412593348eSBarry Smith } 2342b6490206SBarry Smith a->nz = 0; 2343b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 23442593348eSBarry Smith 2345b6490206SBarry Smith /* read in nonzero values */ 234635aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 234777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 234835aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2349b6490206SBarry Smith 2350b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2351b6490206SBarry Smith nzcount = 0; jcount = 0; 2352b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2353b6490206SBarry Smith nzcountb = nzcount; 235435aab85fSBarry Smith nmask = 0; 2355b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2356b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2357b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 235835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 235935aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2360b6490206SBarry Smith } 2361b6490206SBarry Smith rowcount++; 2362b6490206SBarry Smith } 2363de6a44a3SBarry Smith /* sort the masked values */ 236477c4ece6SBarry Smith PetscSortInt(nmask,masked); 2365de6a44a3SBarry Smith 2366b6490206SBarry Smith /* set "j" values into matrix */ 2367b6490206SBarry Smith maskcount = 1; 236835aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 236935aab85fSBarry Smith a->j[jcount++] = masked[j]; 2370de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2371b6490206SBarry Smith } 2372b6490206SBarry Smith /* set "a" values into matrix */ 2373de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2374b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2375b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2376b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2377de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2378de6a44a3SBarry Smith block = mask[tmp] - 1; 2379de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2380de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2381b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2382b6490206SBarry Smith } 2383b6490206SBarry Smith } 238435aab85fSBarry Smith /* zero out the mask elements we set */ 238535aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2386b6490206SBarry Smith } 2387e3372554SBarry Smith if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2388b6490206SBarry Smith 2389b6490206SBarry Smith PetscFree(rowlengths); 2390b6490206SBarry Smith PetscFree(browlengths); 2391b6490206SBarry Smith PetscFree(aa); 2392b6490206SBarry Smith PetscFree(jj); 2393b6490206SBarry Smith PetscFree(mask); 2394b6490206SBarry Smith 2395b6490206SBarry Smith B->assembled = PETSC_TRUE; 2396de6a44a3SBarry Smith 2397de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2398de6a44a3SBarry Smith if (flg) { 239919bcc07fSBarry Smith Viewer tviewer; 240019bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2401639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 240219bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 240319bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2404de6a44a3SBarry Smith } 2405de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2406de6a44a3SBarry Smith if (flg) { 240719bcc07fSBarry Smith Viewer tviewer; 240819bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2409639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 241019bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 241119bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2412de6a44a3SBarry Smith } 2413de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2414de6a44a3SBarry Smith if (flg) { 241519bcc07fSBarry Smith Viewer tviewer; 241619bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 241719bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 241819bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2419de6a44a3SBarry Smith } 2420de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2421de6a44a3SBarry Smith if (flg) { 242219bcc07fSBarry Smith Viewer tviewer; 242319bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2424639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 242519bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 242619bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2427de6a44a3SBarry Smith } 2428de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2429de6a44a3SBarry Smith if (flg) { 243019bcc07fSBarry Smith Viewer tviewer; 243119bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 243219bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 243319bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 243419bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2435de6a44a3SBarry Smith } 24362593348eSBarry Smith return 0; 24372593348eSBarry Smith } 24382593348eSBarry Smith 24392593348eSBarry Smith 24402593348eSBarry Smith 2441