19c01be13SBarry Smith 2a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 3*3a40ed3dSBarry Smith static char vcid[] = "$Id: baij.c,v 1.113 1997/10/01 22:39:40 bsmith Exp bsmith $"; 42593348eSBarry Smith #endif 52593348eSBarry Smith 62593348eSBarry Smith /* 7b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 82593348eSBarry Smith matrix storage format. 92593348eSBarry Smith */ 103369ce9aSBarry Smith 113369ce9aSBarry Smith #include "pinclude/pviewer.h" 123369ce9aSBarry Smith #include "sys.h" 1370f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 141a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 151a6a6d98SBarry Smith #include "src/inline/spops.h" 162593348eSBarry Smith #include "petsc.h" 173b547af2SSatish Balay 182593348eSBarry Smith 19de6a44a3SBarry Smith /* 20de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 21de6a44a3SBarry Smith */ 22de6a44a3SBarry Smith 235615d1e5SSatish Balay #undef __FUNC__ 245615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ" 25de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 26de6a44a3SBarry Smith { 27de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 287fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 29de6a44a3SBarry Smith 30*3a40ed3dSBarry Smith PetscFunctionBegin; 31de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 32de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 337fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 34de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 35de6a44a3SBarry Smith if (a->j[j] == i) { 36de6a44a3SBarry Smith diag[i] = j; 37de6a44a3SBarry Smith break; 38de6a44a3SBarry Smith } 39de6a44a3SBarry Smith } 40de6a44a3SBarry Smith } 41de6a44a3SBarry Smith a->diag = diag; 42*3a40ed3dSBarry Smith PetscFunctionReturn(0); 43de6a44a3SBarry Smith } 442593348eSBarry Smith 452593348eSBarry Smith 463b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 473b2fbd54SBarry Smith 485615d1e5SSatish Balay #undef __FUNC__ 495615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 503b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 513b2fbd54SBarry Smith PetscTruth *done) 523b2fbd54SBarry Smith { 533b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 543b2fbd54SBarry Smith int ierr, n = a->mbs,i; 553b2fbd54SBarry Smith 56*3a40ed3dSBarry Smith PetscFunctionBegin; 573b2fbd54SBarry Smith *nn = n; 58*3a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 593b2fbd54SBarry Smith if (symmetric) { 603b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 613b2fbd54SBarry Smith } else if (oshift == 1) { 623b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 633b2fbd54SBarry Smith int nz = a->i[n] + 1; 643b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 653b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 663b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 673b2fbd54SBarry Smith } else { 683b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 693b2fbd54SBarry Smith } 703b2fbd54SBarry Smith 71*3a40ed3dSBarry Smith PetscFunctionReturn(0); 723b2fbd54SBarry Smith } 733b2fbd54SBarry Smith 745615d1e5SSatish Balay #undef __FUNC__ 75d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 763b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 773b2fbd54SBarry Smith PetscTruth *done) 783b2fbd54SBarry Smith { 793b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 803b2fbd54SBarry Smith int i,n = a->mbs; 813b2fbd54SBarry Smith 82*3a40ed3dSBarry Smith PetscFunctionBegin; 83*3a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 843b2fbd54SBarry Smith if (symmetric) { 853b2fbd54SBarry Smith PetscFree(*ia); 863b2fbd54SBarry Smith PetscFree(*ja); 87af5da2bfSBarry Smith } else if (oshift == 1) { 883b2fbd54SBarry Smith int nz = a->i[n]; 893b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 903b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 913b2fbd54SBarry Smith } 92*3a40ed3dSBarry Smith PetscFunctionReturn(0); 933b2fbd54SBarry Smith } 943b2fbd54SBarry Smith 953b2fbd54SBarry Smith 965615d1e5SSatish Balay #undef __FUNC__ 97d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" 98b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 992593348eSBarry Smith { 100b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1019df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102b6490206SBarry Smith Scalar *aa; 1032593348eSBarry Smith 104*3a40ed3dSBarry Smith PetscFunctionBegin; 10590ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1062593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1072593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1083b2fbd54SBarry Smith 1092593348eSBarry Smith col_lens[1] = a->m; 1102593348eSBarry Smith col_lens[2] = a->n; 1117e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1122593348eSBarry Smith 1132593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 114b6490206SBarry Smith count = 0; 115b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 116b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 117b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118b6490206SBarry Smith } 1192593348eSBarry Smith } 1200752156aSBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 1212593348eSBarry Smith PetscFree(col_lens); 1222593348eSBarry Smith 1232593348eSBarry Smith /* store column indices (zero start index) */ 12466963ce1SSatish Balay jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 125b6490206SBarry Smith count = 0; 126b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 127b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 128b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 130b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1312593348eSBarry Smith } 1322593348eSBarry Smith } 133b6490206SBarry Smith } 134b6490206SBarry Smith } 1350752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 136b6490206SBarry Smith PetscFree(jj); 1372593348eSBarry Smith 1382593348eSBarry Smith /* store nonzero values */ 13966963ce1SSatish Balay aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140b6490206SBarry Smith count = 0; 141b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 142b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 143b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1457e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 146b6490206SBarry Smith } 147b6490206SBarry Smith } 148b6490206SBarry Smith } 149b6490206SBarry Smith } 1500752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 151b6490206SBarry Smith PetscFree(aa); 152*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1532593348eSBarry Smith } 1542593348eSBarry Smith 1555615d1e5SSatish Balay #undef __FUNC__ 156d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" 157b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1582593348eSBarry Smith { 159b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1609df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1612593348eSBarry Smith FILE *fd; 1622593348eSBarry Smith char *outputname; 1632593348eSBarry Smith 164*3a40ed3dSBarry Smith PetscFunctionBegin; 16590ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1662593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16790ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 168639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1697ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1702593348eSBarry Smith } 171639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 172e3372554SBarry Smith SETERRQ(1,0,"Matlab format not supported"); 1732593348eSBarry Smith } 174639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 17544cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17644cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17744cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17844cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17944cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 180*3a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 181766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 18344cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 184766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 185766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 186766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 18744cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18844cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18944cd7ae7SLois Curfman McInnes #else 19044cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 19144cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 19244cd7ae7SLois Curfman McInnes #endif 19344cd7ae7SLois Curfman McInnes } 19444cd7ae7SLois Curfman McInnes } 19544cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 19644cd7ae7SLois Curfman McInnes } 19744cd7ae7SLois Curfman McInnes } 19844cd7ae7SLois Curfman McInnes } 1992593348eSBarry Smith else { 200b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 201b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 202b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 203b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 204b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 205*3a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 206766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 20788685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2087e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20988685aaeSLois Curfman McInnes } 210766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 211766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 212766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 213766eeae4SLois Curfman McInnes } 21488685aaeSLois Curfman McInnes else { 2157e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 21688685aaeSLois Curfman McInnes } 21788685aaeSLois Curfman McInnes #else 2187e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 21988685aaeSLois Curfman McInnes #endif 2202593348eSBarry Smith } 2212593348eSBarry Smith } 2222593348eSBarry Smith fprintf(fd,"\n"); 2232593348eSBarry Smith } 2242593348eSBarry Smith } 225b6490206SBarry Smith } 2262593348eSBarry Smith fflush(fd); 227*3a40ed3dSBarry Smith PetscFunctionReturn(0); 2282593348eSBarry Smith } 2292593348eSBarry Smith 2305615d1e5SSatish Balay #undef __FUNC__ 231d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" 2323270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2333270192aSSatish Balay { 2343270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2353270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2363270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2373270192aSSatish Balay Scalar *aa; 2383270192aSSatish Balay Draw draw; 2393270192aSSatish Balay DrawButton button; 2403270192aSSatish Balay PetscTruth isnull; 2413270192aSSatish Balay 242*3a40ed3dSBarry Smith PetscFunctionBegin; 243*3a40ed3dSBarry Smith ierr = ViewerDrawGetDraw(viewer,&draw);CHKERRQ(ierr); 244*3a40ed3dSBarry Smith ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 2453270192aSSatish Balay 2463270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2473270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2483270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2493270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2503270192aSSatish Balay color = DRAW_BLUE; 2513270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2523270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2533270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2543270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2553270192aSSatish Balay aa = a->a + j*bs2; 2563270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2573270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2583270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 259b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2603270192aSSatish Balay } 2613270192aSSatish Balay } 2623270192aSSatish Balay } 2633270192aSSatish Balay } 2643270192aSSatish Balay color = DRAW_CYAN; 2653270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2663270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2673270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2683270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2693270192aSSatish Balay aa = a->a + j*bs2; 2703270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2713270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2723270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 273b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2743270192aSSatish Balay } 2753270192aSSatish Balay } 2763270192aSSatish Balay } 2773270192aSSatish Balay } 2783270192aSSatish Balay 2793270192aSSatish Balay color = DRAW_RED; 2803270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2813270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2823270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2833270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2843270192aSSatish Balay aa = a->a + j*bs2; 2853270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2863270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2873270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 288b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2893270192aSSatish Balay } 2903270192aSSatish Balay } 2913270192aSSatish Balay } 2923270192aSSatish Balay } 2933270192aSSatish Balay 2943270192aSSatish Balay DrawFlush(draw); 2953270192aSSatish Balay DrawGetPause(draw,&pause); 296*3a40ed3dSBarry Smith if (pause >= 0) { PetscSleep(pause); PetscFunctionReturn(0);} 2973270192aSSatish Balay 2983270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2993270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3003270192aSSatish Balay while (button != BUTTON_RIGHT) { 3013270192aSSatish Balay DrawClear(draw); 3023270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 3033270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 3043270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 3053270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 3063270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 3073270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 3083270192aSSatish Balay w *= scale; h *= scale; 3093270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 3103270192aSSatish Balay color = DRAW_BLUE; 3113270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3123270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3133270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3143270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3153270192aSSatish Balay aa = a->a + j*bs2; 3163270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3173270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3183270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 319b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3203270192aSSatish Balay } 3213270192aSSatish Balay } 3223270192aSSatish Balay } 3233270192aSSatish Balay } 3243270192aSSatish Balay color = DRAW_CYAN; 3253270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3263270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3273270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3283270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3293270192aSSatish Balay aa = a->a + j*bs2; 3303270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3313270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3323270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 333b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3343270192aSSatish Balay } 3353270192aSSatish Balay } 3363270192aSSatish Balay } 3373270192aSSatish Balay } 3383270192aSSatish Balay 3393270192aSSatish Balay color = DRAW_RED; 3403270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3413270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3423270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3433270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3443270192aSSatish Balay aa = a->a + j*bs2; 3453270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3463270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3473270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 348b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3493270192aSSatish Balay } 3503270192aSSatish Balay } 3513270192aSSatish Balay } 3523270192aSSatish Balay } 3533270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3543270192aSSatish Balay } 355*3a40ed3dSBarry Smith PetscFunctionReturn(0); 3563270192aSSatish Balay } 3573270192aSSatish Balay 3585615d1e5SSatish Balay #undef __FUNC__ 359d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ" 3608f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3612593348eSBarry Smith { 3622593348eSBarry Smith Mat A = (Mat) obj; 36319bcc07fSBarry Smith ViewerType vtype; 36419bcc07fSBarry Smith int ierr; 3652593348eSBarry Smith 366*3a40ed3dSBarry Smith PetscFunctionBegin; 36719bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36819bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 369e3372554SBarry Smith SETERRQ(1,0,"Matlab viewer not supported"); 370*3a40ed3dSBarry Smith } else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 371*3a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 372*3a40ed3dSBarry Smith } else if (vtype == BINARY_FILE_VIEWER) { 373*3a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 374*3a40ed3dSBarry Smith } else if (vtype == DRAW_VIEWER) { 375*3a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 3762593348eSBarry Smith } 377*3a40ed3dSBarry Smith PetscFunctionReturn(0); 3782593348eSBarry Smith } 379b6490206SBarry Smith 380119a934aSSatish Balay #define CHUNKSIZE 10 381cd0e1443SSatish Balay 3825615d1e5SSatish Balay #undef __FUNC__ 3835615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ" 384639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 385cd0e1443SSatish Balay { 386cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 387cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 388cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 389a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3909df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 391cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 392cd0e1443SSatish Balay 393*3a40ed3dSBarry Smith PetscFunctionBegin; 394cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 395cd0e1443SSatish Balay row = im[k]; brow = row/bs; 396*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 397e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 398e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 3993b2fbd54SBarry Smith #endif 4002c3acbe9SBarry Smith rp = aj + ai[brow]; 4012c3acbe9SBarry Smith ap = aa + bs2*ai[brow]; 4022c3acbe9SBarry Smith rmax = imax[brow]; 4032c3acbe9SBarry Smith nrow = ailen[brow]; 404cd0e1443SSatish Balay low = 0; 405cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 406*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 407e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 408e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 4093b2fbd54SBarry Smith #endif 410a7c10996SSatish Balay col = in[l]; bcol = col/bs; 411cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 412cd0e1443SSatish Balay if (roworiented) { 413cd0e1443SSatish Balay value = *v++; 414*3a40ed3dSBarry Smith } else { 415cd0e1443SSatish Balay value = v[k + l*m]; 416cd0e1443SSatish Balay } 417cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 4182c3acbe9SBarry Smith while (high-low > 7) { 419cd0e1443SSatish Balay t = (low+high)/2; 420cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 421cd0e1443SSatish Balay else low = t; 422cd0e1443SSatish Balay } 423cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 424cd0e1443SSatish Balay if (rp[i] > bcol) break; 425cd0e1443SSatish Balay if (rp[i] == bcol) { 4267e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 427cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 428cd0e1443SSatish Balay else *bap = value; 429f1241b54SBarry Smith goto noinsert1; 430cd0e1443SSatish Balay } 431cd0e1443SSatish Balay } 432c2653b3dSLois Curfman McInnes if (nonew == 1) goto noinsert1; 43311ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 434cd0e1443SSatish Balay if (nrow >= rmax) { 435cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 436cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 437cd0e1443SSatish Balay Scalar *new_a; 438cd0e1443SSatish Balay 43911ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 44096854ed6SLois Curfman McInnes 44196854ed6SLois Curfman McInnes /* Malloc new storage space */ 4427e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 443cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4447e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 445cd0e1443SSatish Balay new_i = new_j + new_nz; 446cd0e1443SSatish Balay 447cd0e1443SSatish Balay /* copy over old data into new slots */ 448cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4497e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 450a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 451a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 452a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 453cd0e1443SSatish Balay len*sizeof(int)); 454a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 455a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 456a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 457a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 458cd0e1443SSatish Balay /* free up old matrix storage */ 459cd0e1443SSatish Balay PetscFree(a->a); 460cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 461cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 462cd0e1443SSatish Balay a->singlemalloc = 1; 463cd0e1443SSatish Balay 464a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 465cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4667e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 46718e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 468cd0e1443SSatish Balay a->reallocs++; 469119a934aSSatish Balay a->nz++; 470cd0e1443SSatish Balay } 4717e67e3f9SSatish Balay N = nrow++ - 1; 472cd0e1443SSatish Balay /* shift up all the later entries in this row */ 473cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 474cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4757e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 476cd0e1443SSatish Balay } 4777e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 478cd0e1443SSatish Balay rp[i] = bcol; 4797e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 480f1241b54SBarry Smith noinsert1:; 481cd0e1443SSatish Balay low = i; 482cd0e1443SSatish Balay } 483cd0e1443SSatish Balay ailen[brow] = nrow; 484cd0e1443SSatish Balay } 485*3a40ed3dSBarry Smith PetscFunctionReturn(0); 486cd0e1443SSatish Balay } 487cd0e1443SSatish Balay 4885615d1e5SSatish Balay #undef __FUNC__ 48905a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 49092c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 49192c4ed94SBarry Smith { 49292c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4938a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 494dd9472c6SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 495dd9472c6SBarry Smith int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 4960e324ae4SSatish Balay Scalar *ap,*value=v,*aa=a->a,*bap; 49792c4ed94SBarry Smith 498*3a40ed3dSBarry Smith PetscFunctionBegin; 4990e324ae4SSatish Balay if (roworiented) { 5000e324ae4SSatish Balay stepval = (n-1)*bs; 5010e324ae4SSatish Balay } else { 5020e324ae4SSatish Balay stepval = (m-1)*bs; 5030e324ae4SSatish Balay } 50492c4ed94SBarry Smith for ( k=0; k<m; k++ ) { /* loop over added rows */ 50592c4ed94SBarry Smith row = im[k]; 506*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 50792c4ed94SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 5088a84c255SSatish Balay if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 50992c4ed94SBarry Smith #endif 51092c4ed94SBarry Smith rp = aj + ai[row]; 51192c4ed94SBarry Smith ap = aa + bs2*ai[row]; 51292c4ed94SBarry Smith rmax = imax[row]; 51392c4ed94SBarry Smith nrow = ailen[row]; 51492c4ed94SBarry Smith low = 0; 51592c4ed94SBarry Smith for ( l=0; l<n; l++ ) { /* loop over added columns */ 516*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g) 51792c4ed94SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 5188a84c255SSatish Balay if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 51992c4ed94SBarry Smith #endif 52092c4ed94SBarry Smith col = in[l]; 52192c4ed94SBarry Smith if (roworiented) { 5220e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 5230e324ae4SSatish Balay } else { 5240e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 52592c4ed94SBarry Smith } 52692c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 52792c4ed94SBarry Smith while (high-low > 7) { 52892c4ed94SBarry Smith t = (low+high)/2; 52992c4ed94SBarry Smith if (rp[t] > col) high = t; 53092c4ed94SBarry Smith else low = t; 53192c4ed94SBarry Smith } 53292c4ed94SBarry Smith for ( i=low; i<high; i++ ) { 53392c4ed94SBarry Smith if (rp[i] > col) break; 53492c4ed94SBarry Smith if (rp[i] == col) { 5358a84c255SSatish Balay bap = ap + bs2*i; 5360e324ae4SSatish Balay if (roworiented) { 5378a84c255SSatish Balay if (is == ADD_VALUES) { 538dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 539dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5408a84c255SSatish Balay bap[jj] += *value++; 541dd9472c6SBarry Smith } 542dd9472c6SBarry Smith } 5430e324ae4SSatish Balay } else { 544dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 545dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5460e324ae4SSatish Balay bap[jj] = *value++; 5478a84c255SSatish Balay } 548dd9472c6SBarry Smith } 549dd9472c6SBarry Smith } 5500e324ae4SSatish Balay } else { 5510e324ae4SSatish Balay if (is == ADD_VALUES) { 552dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 553dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5540e324ae4SSatish Balay *bap++ += *value++; 555dd9472c6SBarry Smith } 556dd9472c6SBarry Smith } 5570e324ae4SSatish Balay } else { 558dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 559dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5600e324ae4SSatish Balay *bap++ = *value++; 5610e324ae4SSatish Balay } 5628a84c255SSatish Balay } 563dd9472c6SBarry Smith } 564dd9472c6SBarry Smith } 565f1241b54SBarry Smith goto noinsert2; 56692c4ed94SBarry Smith } 56792c4ed94SBarry Smith } 56889280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 56911ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 57092c4ed94SBarry Smith if (nrow >= rmax) { 57192c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 57292c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 57392c4ed94SBarry Smith Scalar *new_a; 57492c4ed94SBarry Smith 57511ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 57689280ab3SLois Curfman McInnes 57792c4ed94SBarry Smith /* malloc new storage space */ 57892c4ed94SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 57992c4ed94SBarry Smith new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 58092c4ed94SBarry Smith new_j = (int *) (new_a + bs2*new_nz); 58192c4ed94SBarry Smith new_i = new_j + new_nz; 58292c4ed94SBarry Smith 58392c4ed94SBarry Smith /* copy over old data into new slots */ 58492c4ed94SBarry Smith for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 58592c4ed94SBarry Smith for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 58692c4ed94SBarry Smith PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 58792c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 588dd9472c6SBarry Smith PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 58992c4ed94SBarry Smith PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 59092c4ed94SBarry Smith PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 591dd9472c6SBarry Smith PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 59292c4ed94SBarry Smith /* free up old matrix storage */ 59392c4ed94SBarry Smith PetscFree(a->a); 59492c4ed94SBarry Smith if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 59592c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 59692c4ed94SBarry Smith a->singlemalloc = 1; 59792c4ed94SBarry Smith 59892c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 59992c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 60092c4ed94SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 60192c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 60292c4ed94SBarry Smith a->reallocs++; 60392c4ed94SBarry Smith a->nz++; 60492c4ed94SBarry Smith } 60592c4ed94SBarry Smith N = nrow++ - 1; 60692c4ed94SBarry Smith /* shift up all the later entries in this row */ 60792c4ed94SBarry Smith for ( ii=N; ii>=i; ii-- ) { 60892c4ed94SBarry Smith rp[ii+1] = rp[ii]; 60992c4ed94SBarry Smith PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 61092c4ed94SBarry Smith } 61192c4ed94SBarry Smith if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 61292c4ed94SBarry Smith rp[i] = col; 6138a84c255SSatish Balay bap = ap + bs2*i; 6140e324ae4SSatish Balay if (roworiented) { 615dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval) { 616dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 6170e324ae4SSatish Balay bap[jj] = *value++; 618dd9472c6SBarry Smith } 619dd9472c6SBarry Smith } 6200e324ae4SSatish Balay } else { 621dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 622dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 6230e324ae4SSatish Balay *bap++ = *value++; 6240e324ae4SSatish Balay } 625dd9472c6SBarry Smith } 626dd9472c6SBarry Smith } 627f1241b54SBarry Smith noinsert2:; 62892c4ed94SBarry Smith low = i; 62992c4ed94SBarry Smith } 63092c4ed94SBarry Smith ailen[row] = nrow; 63192c4ed94SBarry Smith } 632*3a40ed3dSBarry Smith PetscFunctionReturn(0); 63392c4ed94SBarry Smith } 63492c4ed94SBarry Smith 63592c4ed94SBarry Smith #undef __FUNC__ 636d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" 6378f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 6380b824a48SSatish Balay { 6390b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 640*3a40ed3dSBarry Smith 641*3a40ed3dSBarry Smith PetscFunctionBegin; 6420752156aSBarry Smith if (m) *m = a->m; 6430752156aSBarry Smith if (n) *n = a->n; 644*3a40ed3dSBarry Smith PetscFunctionReturn(0); 6450b824a48SSatish Balay } 6460b824a48SSatish Balay 6475615d1e5SSatish Balay #undef __FUNC__ 648d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 6498f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 6500b824a48SSatish Balay { 6510b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 652*3a40ed3dSBarry Smith 653*3a40ed3dSBarry Smith PetscFunctionBegin; 6540b824a48SSatish Balay *m = 0; *n = a->m; 655*3a40ed3dSBarry Smith PetscFunctionReturn(0); 6560b824a48SSatish Balay } 6570b824a48SSatish Balay 6585615d1e5SSatish Balay #undef __FUNC__ 6595615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ" 6609867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6619867e207SSatish Balay { 6629867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6637e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 6649867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 6659867e207SSatish Balay 666*3a40ed3dSBarry Smith PetscFunctionBegin; 6679867e207SSatish Balay bs = a->bs; 6689867e207SSatish Balay ai = a->i; 6699867e207SSatish Balay aj = a->j; 6709867e207SSatish Balay aa = a->a; 6719df24120SSatish Balay bs2 = a->bs2; 6729867e207SSatish Balay 673e3372554SBarry Smith if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 6749867e207SSatish Balay 6759867e207SSatish Balay bn = row/bs; /* Block number */ 6769867e207SSatish Balay bp = row % bs; /* Block Position */ 6779867e207SSatish Balay M = ai[bn+1] - ai[bn]; 6789867e207SSatish Balay *nz = bs*M; 6799867e207SSatish Balay 6809867e207SSatish Balay if (v) { 6819867e207SSatish Balay *v = 0; 6829867e207SSatish Balay if (*nz) { 6839867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 6849867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6859867e207SSatish Balay v_i = *v + i*bs; 6867e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 6877e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 6889867e207SSatish Balay } 6899867e207SSatish Balay } 6909867e207SSatish Balay } 6919867e207SSatish Balay 6929867e207SSatish Balay if (idx) { 6939867e207SSatish Balay *idx = 0; 6949867e207SSatish Balay if (*nz) { 6959867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 6969867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6979867e207SSatish Balay idx_i = *idx + i*bs; 6989867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 6999867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 7009867e207SSatish Balay } 7019867e207SSatish Balay } 7029867e207SSatish Balay } 703*3a40ed3dSBarry Smith PetscFunctionReturn(0); 7049867e207SSatish Balay } 7059867e207SSatish Balay 7065615d1e5SSatish Balay #undef __FUNC__ 707d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" 7089867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 7099867e207SSatish Balay { 710*3a40ed3dSBarry Smith PetscFunctionBegin; 7119867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 7129867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 713*3a40ed3dSBarry Smith PetscFunctionReturn(0); 7149867e207SSatish Balay } 715b6490206SBarry Smith 7165615d1e5SSatish Balay #undef __FUNC__ 7175615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ" 7188f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B) 719f2501298SSatish Balay { 720f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 721f2501298SSatish Balay Mat C; 722f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 7239df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 724f2501298SSatish Balay Scalar *array=a->a; 725f2501298SSatish Balay 726*3a40ed3dSBarry Smith PetscFunctionBegin; 727*3a40ed3dSBarry Smith if (B==PETSC_NULL && mbs!=nbs) SETERRQ(1,0,"Square matrix only for in-place"); 728f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 729f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 730a7c10996SSatish Balay 731a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 732f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 733f2501298SSatish Balay PetscFree(col); 734f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 735f2501298SSatish Balay cols = rows + bs; 736f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 737f2501298SSatish Balay cols[0] = i*bs; 738f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 739f2501298SSatish Balay len = ai[i+1] - ai[i]; 740f2501298SSatish Balay for ( j=0; j<len; j++ ) { 741f2501298SSatish Balay rows[0] = (*aj++)*bs; 742f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 743f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 744f2501298SSatish Balay array += bs2; 745f2501298SSatish Balay } 746f2501298SSatish Balay } 7471073c447SSatish Balay PetscFree(rows); 748f2501298SSatish Balay 7496d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7506d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 751f2501298SSatish Balay 752f2501298SSatish Balay if (B != PETSC_NULL) { 753f2501298SSatish Balay *B = C; 754f2501298SSatish Balay } else { 755f2501298SSatish Balay /* This isn't really an in-place transpose */ 756f2501298SSatish Balay PetscFree(a->a); 757f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 758f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 759f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 760f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 761f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 762f2501298SSatish Balay PetscFree(a); 763f09e8eb9SSatish Balay PetscMemcpy(A,C,sizeof(struct _p_Mat)); 764f2501298SSatish Balay PetscHeaderDestroy(C); 765f2501298SSatish Balay } 766*3a40ed3dSBarry Smith PetscFunctionReturn(0); 767f2501298SSatish Balay } 768f2501298SSatish Balay 769f2501298SSatish Balay 7705615d1e5SSatish Balay #undef __FUNC__ 7715615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 7728f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 773584200bdSSatish Balay { 774584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 775584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 776a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 77743ee02c3SBarry Smith int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 778584200bdSSatish Balay Scalar *aa = a->a, *ap; 779584200bdSSatish Balay 780*3a40ed3dSBarry Smith PetscFunctionBegin; 781*3a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 782584200bdSSatish Balay 78343ee02c3SBarry Smith if (m) rmax = ailen[0]; 784584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 785584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 786584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 787d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 788584200bdSSatish Balay if (fshift) { 789a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 790584200bdSSatish Balay N = ailen[i]; 791584200bdSSatish Balay for ( j=0; j<N; j++ ) { 792584200bdSSatish Balay ip[j-fshift] = ip[j]; 7937e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 794584200bdSSatish Balay } 795584200bdSSatish Balay } 796584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 797584200bdSSatish Balay } 798584200bdSSatish Balay if (mbs) { 799584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 800584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 801584200bdSSatish Balay } 802584200bdSSatish Balay /* reset ilen and imax for each row */ 803584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 804584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 805584200bdSSatish Balay } 806a7c10996SSatish Balay a->nz = ai[mbs]; 807584200bdSSatish Balay 808584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 809584200bdSSatish Balay if (fshift && a->diag) { 810584200bdSSatish Balay PetscFree(a->diag); 811584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 812584200bdSSatish Balay a->diag = 0; 813584200bdSSatish Balay } 8143d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 815219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 8163d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 817584200bdSSatish Balay a->reallocs); 818d402145bSBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 819e2f3b5e9SSatish Balay a->reallocs = 0; 8204e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 8214e220ebcSLois Curfman McInnes 822*3a40ed3dSBarry Smith PetscFunctionReturn(0); 823584200bdSSatish Balay } 824584200bdSSatish Balay 8255615d1e5SSatish Balay #undef __FUNC__ 8265615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ" 8278f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A) 8282593348eSBarry Smith { 829b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 830*3a40ed3dSBarry Smith 831*3a40ed3dSBarry Smith PetscFunctionBegin; 8329df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 833*3a40ed3dSBarry Smith PetscFunctionReturn(0); 8342593348eSBarry Smith } 8352593348eSBarry Smith 8365615d1e5SSatish Balay #undef __FUNC__ 837d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" 838b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 8392593348eSBarry Smith { 8402593348eSBarry Smith Mat A = (Mat) obj; 841b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8422593348eSBarry Smith 843*3a40ed3dSBarry Smith #if defined(USE_PETSC_LOG) 8442593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 8452593348eSBarry Smith #endif 8462593348eSBarry Smith PetscFree(a->a); 8472593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 8482593348eSBarry Smith if (a->diag) PetscFree(a->diag); 8492593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 8502593348eSBarry Smith if (a->imax) PetscFree(a->imax); 8512593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 852de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 8532593348eSBarry Smith PetscFree(a); 854f2655603SLois Curfman McInnes PLogObjectDestroy(A); 855f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 856*3a40ed3dSBarry Smith PetscFunctionReturn(0); 8572593348eSBarry Smith } 8582593348eSBarry Smith 8595615d1e5SSatish Balay #undef __FUNC__ 860d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" 8618f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op) 8622593348eSBarry Smith { 863b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 864*3a40ed3dSBarry Smith 865*3a40ed3dSBarry Smith PetscFunctionBegin; 8666d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 8676d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 8686d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 869219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 8706d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 871c2653b3dSLois Curfman McInnes else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 87296854ed6SLois Curfman McInnes else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 8736d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 8746d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 875219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 8766d4a8577SBarry Smith op == MAT_SYMMETRIC || 8776d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 87890f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 879*3a40ed3dSBarry Smith op == MAT_IGNORE_OFF_PROC_ENTRIES) { 88094a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 881*3a40ed3dSBarry Smith } else if (op == MAT_NO_NEW_DIAGONALS) { 882*3a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 883*3a40ed3dSBarry Smith } else { 884*3a40ed3dSBarry Smith SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 885*3a40ed3dSBarry Smith } 886*3a40ed3dSBarry Smith PetscFunctionReturn(0); 8872593348eSBarry Smith } 8882593348eSBarry Smith 8892593348eSBarry Smith 8902593348eSBarry Smith /* -------------------------------------------------------*/ 8912593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 8922593348eSBarry Smith /* -------------------------------------------------------*/ 893b6490206SBarry Smith #include "pinclude/plapack.h" 894b6490206SBarry Smith 8955615d1e5SSatish Balay #undef __FUNC__ 8965615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1" 89739b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 8982593348eSBarry Smith { 899b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 90039b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 901c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 9022593348eSBarry Smith 903*3a40ed3dSBarry Smith PetscFunctionBegin; 904c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 905c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 906b6490206SBarry Smith 907119a934aSSatish Balay idx = a->j; 908119a934aSSatish Balay v = a->a; 909119a934aSSatish Balay ii = a->i; 910119a934aSSatish Balay 911119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 912119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 913119a934aSSatish Balay sum = 0.0; 914119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 9151a6a6d98SBarry Smith z[i] = sum; 916119a934aSSatish Balay } 917c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 918c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 91939b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 920*3a40ed3dSBarry Smith PetscFunctionReturn(0); 92139b95ed1SBarry Smith } 92239b95ed1SBarry Smith 9235615d1e5SSatish Balay #undef __FUNC__ 9245615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2" 92539b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 92639b95ed1SBarry Smith { 92739b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 92839b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 92939b95ed1SBarry Smith register Scalar x1,x2; 930*3a40ed3dSBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 93139b95ed1SBarry Smith 932*3a40ed3dSBarry Smith PetscFunctionBegin; 933c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 934c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 93539b95ed1SBarry Smith 93639b95ed1SBarry Smith idx = a->j; 93739b95ed1SBarry Smith v = a->a; 93839b95ed1SBarry Smith ii = a->i; 93939b95ed1SBarry Smith 940119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 941119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 942119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 943119a934aSSatish Balay for ( j=0; j<n; j++ ) { 944119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 945119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 946119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 947119a934aSSatish Balay v += 4; 948119a934aSSatish Balay } 9491a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 950119a934aSSatish Balay z += 2; 951119a934aSSatish Balay } 952c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 953c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 95439b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 955*3a40ed3dSBarry Smith PetscFunctionReturn(0); 95639b95ed1SBarry Smith } 95739b95ed1SBarry Smith 9585615d1e5SSatish Balay #undef __FUNC__ 9595615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3" 96039b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 96139b95ed1SBarry Smith { 96239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 96339b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 964c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 96539b95ed1SBarry Smith 966*3a40ed3dSBarry Smith PetscFunctionBegin; 967c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 968c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 96939b95ed1SBarry Smith 97039b95ed1SBarry Smith idx = a->j; 97139b95ed1SBarry Smith v = a->a; 97239b95ed1SBarry Smith ii = a->i; 97339b95ed1SBarry Smith 974119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 975119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 976119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 977119a934aSSatish Balay for ( j=0; j<n; j++ ) { 978119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 979119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 980119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 981119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 982119a934aSSatish Balay v += 9; 983119a934aSSatish Balay } 9841a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 985119a934aSSatish Balay z += 3; 986119a934aSSatish Balay } 987c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 988c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 98939b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 990*3a40ed3dSBarry Smith PetscFunctionReturn(0); 99139b95ed1SBarry Smith } 99239b95ed1SBarry Smith 9935615d1e5SSatish Balay #undef __FUNC__ 9945615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4" 99539b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 99639b95ed1SBarry Smith { 99739b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 998*3a40ed3dSBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4; 999*3a40ed3dSBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 100039b95ed1SBarry Smith 1001*3a40ed3dSBarry Smith PetscFunctionBegin; 1002c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1003c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 100439b95ed1SBarry Smith 100539b95ed1SBarry Smith idx = a->j; 100639b95ed1SBarry Smith v = a->a; 100739b95ed1SBarry Smith ii = a->i; 100839b95ed1SBarry Smith 1009119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1010119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1011119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 1012119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1013119a934aSSatish Balay xb = x + 4*(*idx++); 1014119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1015119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1016119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1017119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1018119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1019119a934aSSatish Balay v += 16; 1020119a934aSSatish Balay } 10211a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1022119a934aSSatish Balay z += 4; 1023119a934aSSatish Balay } 1024c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1025c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 102639b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 1027*3a40ed3dSBarry Smith PetscFunctionReturn(0); 102839b95ed1SBarry Smith } 102939b95ed1SBarry Smith 10305615d1e5SSatish Balay #undef __FUNC__ 10315615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5" 103239b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 103339b95ed1SBarry Smith { 103439b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1035*3a40ed3dSBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 1036c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 103739b95ed1SBarry Smith 1038c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1039c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 104039b95ed1SBarry Smith 104139b95ed1SBarry Smith idx = a->j; 104239b95ed1SBarry Smith v = a->a; 104339b95ed1SBarry Smith ii = a->i; 104439b95ed1SBarry Smith 1045119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1046119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1047119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1048119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1049119a934aSSatish Balay xb = x + 5*(*idx++); 1050119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1051119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1052119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1053119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1054119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1055119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1056119a934aSSatish Balay v += 25; 1057119a934aSSatish Balay } 10581a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1059119a934aSSatish Balay z += 5; 1060119a934aSSatish Balay } 1061c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1062c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 106339b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 1064*3a40ed3dSBarry Smith PetscFunctionReturn(0); 106539b95ed1SBarry Smith } 106639b95ed1SBarry Smith 10675615d1e5SSatish Balay #undef __FUNC__ 106848e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7" 106948e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 107048e9ddb2SSatish Balay { 107148e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 107248e9ddb2SSatish Balay register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 107348e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 107448e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 107548e9ddb2SSatish Balay 107648e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 107748e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 107848e9ddb2SSatish Balay 107948e9ddb2SSatish Balay idx = a->j; 108048e9ddb2SSatish Balay v = a->a; 108148e9ddb2SSatish Balay ii = a->i; 108248e9ddb2SSatish Balay 108348e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 108448e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 108548e9ddb2SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 108648e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 108748e9ddb2SSatish Balay xb = x + 7*(*idx++); 108848e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 108948e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 109048e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 109148e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 109248e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 109348e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 109448e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 109548e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 109648e9ddb2SSatish Balay v += 49; 109748e9ddb2SSatish Balay } 109848e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 109948e9ddb2SSatish Balay z += 7; 110048e9ddb2SSatish Balay } 110148e9ddb2SSatish Balay 110248e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 110348e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 110448e9ddb2SSatish Balay PLogFlops(98*a->nz - a->m); 1105*3a40ed3dSBarry Smith PetscFunctionReturn(0); 110648e9ddb2SSatish Balay } 110748e9ddb2SSatish Balay 110848e9ddb2SSatish Balay #undef __FUNC__ 11095615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N" 111039b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 111139b95ed1SBarry Smith { 111239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 111339b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 1114*3a40ed3dSBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1115c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1116c16cb8f2SBarry Smith int _One = 1,ncols,k; 111739b95ed1SBarry Smith 1118*3a40ed3dSBarry Smith PetscFunctionBegin; 1119c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1120c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 112139b95ed1SBarry Smith 112239b95ed1SBarry Smith idx = a->j; 112339b95ed1SBarry Smith v = a->a; 112439b95ed1SBarry Smith ii = a->i; 112539b95ed1SBarry Smith 112639b95ed1SBarry Smith 1127119a934aSSatish Balay if (!a->mult_work) { 11283b547af2SSatish Balay k = PetscMax(a->m,a->n); 11292b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1130119a934aSSatish Balay } 1131119a934aSSatish Balay work = a->mult_work; 1132119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1133119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1134119a934aSSatish Balay ncols = n*bs; 1135119a934aSSatish Balay workt = work; 1136119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1137119a934aSSatish Balay xb = x + bs*(*idx++); 1138119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1139119a934aSSatish Balay workt += bs; 1140119a934aSSatish Balay } 11411a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1142119a934aSSatish Balay v += n*bs2; 1143119a934aSSatish Balay z += bs; 1144119a934aSSatish Balay } 1145c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1146c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 11471a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 1148*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1149bb42667fSSatish Balay } 1150bb42667fSSatish Balay 11515615d1e5SSatish Balay #undef __FUNC__ 11525615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1153f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1154f44d3a6dSSatish Balay { 1155f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1156f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 1157c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 1158f44d3a6dSSatish Balay 1159*3a40ed3dSBarry Smith PetscFunctionBegin; 1160c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1161c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1162c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1163f44d3a6dSSatish Balay 1164f44d3a6dSSatish Balay idx = a->j; 1165f44d3a6dSSatish Balay v = a->a; 1166f44d3a6dSSatish Balay ii = a->i; 1167f44d3a6dSSatish Balay 1168f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1169f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1170f44d3a6dSSatish Balay sum = y[i]; 1171f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 1172f44d3a6dSSatish Balay z[i] = sum; 1173f44d3a6dSSatish Balay } 1174c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1175c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1176c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1177f44d3a6dSSatish Balay PLogFlops(2*a->nz); 1178*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1179f44d3a6dSSatish Balay } 1180f44d3a6dSSatish Balay 11815615d1e5SSatish Balay #undef __FUNC__ 11825615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1183f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1184f44d3a6dSSatish Balay { 1185f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1186f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1187f44d3a6dSSatish Balay register Scalar x1,x2; 1188*3a40ed3dSBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1189f44d3a6dSSatish Balay 1190*3a40ed3dSBarry Smith PetscFunctionBegin; 1191c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1192c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1193c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1194f44d3a6dSSatish Balay 1195f44d3a6dSSatish Balay idx = a->j; 1196f44d3a6dSSatish Balay v = a->a; 1197f44d3a6dSSatish Balay ii = a->i; 1198f44d3a6dSSatish Balay 1199f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1200f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1201f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 1202f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1203f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1204f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 1205f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 1206f44d3a6dSSatish Balay v += 4; 1207f44d3a6dSSatish Balay } 1208f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 1209f44d3a6dSSatish Balay z += 2; y += 2; 1210f44d3a6dSSatish Balay } 1211c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1212c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1213c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1214f44d3a6dSSatish Balay PLogFlops(4*a->nz); 1215*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1216f44d3a6dSSatish Balay } 1217f44d3a6dSSatish Balay 12185615d1e5SSatish Balay #undef __FUNC__ 12195615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1220f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1221f44d3a6dSSatish Balay { 1222f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1223f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1224c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1225f44d3a6dSSatish Balay 1226*3a40ed3dSBarry Smith PetscFunctionBegin; 1227c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1228c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1229c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1230f44d3a6dSSatish Balay 1231f44d3a6dSSatish Balay idx = a->j; 1232f44d3a6dSSatish Balay v = a->a; 1233f44d3a6dSSatish Balay ii = a->i; 1234f44d3a6dSSatish Balay 1235f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1236f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1237f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1238f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1239f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1240f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1241f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1242f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1243f44d3a6dSSatish Balay v += 9; 1244f44d3a6dSSatish Balay } 1245f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 1246f44d3a6dSSatish Balay z += 3; y += 3; 1247f44d3a6dSSatish Balay } 1248c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1249c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1250c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1251f44d3a6dSSatish Balay PLogFlops(18*a->nz); 1252*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1253f44d3a6dSSatish Balay } 1254f44d3a6dSSatish Balay 12555615d1e5SSatish Balay #undef __FUNC__ 12565615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1257f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1258f44d3a6dSSatish Balay { 1259f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1260*3a40ed3dSBarry Smith register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4; 1261f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1262c16cb8f2SBarry Smith int j,n; 1263f44d3a6dSSatish Balay 1264*3a40ed3dSBarry Smith PetscFunctionBegin; 1265c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1266c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1267c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1268f44d3a6dSSatish Balay 1269f44d3a6dSSatish Balay idx = a->j; 1270f44d3a6dSSatish Balay v = a->a; 1271f44d3a6dSSatish Balay ii = a->i; 1272f44d3a6dSSatish Balay 1273f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1274f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1275f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1276f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1277f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1278f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1279f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1280f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1281f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1282f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1283f44d3a6dSSatish Balay v += 16; 1284f44d3a6dSSatish Balay } 1285f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1286f44d3a6dSSatish Balay z += 4; y += 4; 1287f44d3a6dSSatish Balay } 1288c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1289c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1290c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1291f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1292*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1293f44d3a6dSSatish Balay } 1294f44d3a6dSSatish Balay 12955615d1e5SSatish Balay #undef __FUNC__ 12965615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1297f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1298f44d3a6dSSatish Balay { 1299f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1300*3a40ed3dSBarry Smith register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 1301c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1302f44d3a6dSSatish Balay 1303*3a40ed3dSBarry Smith PetscFunctionBegin; 1304c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1305c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1306c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1307f44d3a6dSSatish Balay 1308f44d3a6dSSatish Balay idx = a->j; 1309f44d3a6dSSatish Balay v = a->a; 1310f44d3a6dSSatish Balay ii = a->i; 1311f44d3a6dSSatish Balay 1312f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1313f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1314f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1315f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1316f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1317f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1318f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1319f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1320f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1321f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1322f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1323f44d3a6dSSatish Balay v += 25; 1324f44d3a6dSSatish Balay } 1325f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1326f44d3a6dSSatish Balay z += 5; y += 5; 1327f44d3a6dSSatish Balay } 1328c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1329c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1330c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1331f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1332*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1333f44d3a6dSSatish Balay } 1334f44d3a6dSSatish Balay 13355615d1e5SSatish Balay #undef __FUNC__ 133648e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 133748e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 133848e9ddb2SSatish Balay { 133948e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 134048e9ddb2SSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 134148e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 134248e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 134348e9ddb2SSatish Balay 1344*3a40ed3dSBarry Smith PetscFunctionBegin; 134548e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 134648e9ddb2SSatish Balay VecGetArray_Fast(yy,y); 134748e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 134848e9ddb2SSatish Balay 134948e9ddb2SSatish Balay idx = a->j; 135048e9ddb2SSatish Balay v = a->a; 135148e9ddb2SSatish Balay ii = a->i; 135248e9ddb2SSatish Balay 135348e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 135448e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 135548e9ddb2SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 135648e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 135748e9ddb2SSatish Balay xb = x + 7*(*idx++); 135848e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 135948e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 136048e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 136148e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 136248e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 136348e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 136448e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 136548e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 136648e9ddb2SSatish Balay v += 49; 136748e9ddb2SSatish Balay } 136848e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 136948e9ddb2SSatish Balay z += 7; y += 7; 137048e9ddb2SSatish Balay } 137148e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 137248e9ddb2SSatish Balay VecRestoreArray_Fast(yy,y); 137348e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 137448e9ddb2SSatish Balay PLogFlops(98*a->nz); 1375*3a40ed3dSBarry Smith PetscFunctionReturn(0); 137648e9ddb2SSatish Balay } 137748e9ddb2SSatish Balay 137848e9ddb2SSatish Balay 137948e9ddb2SSatish Balay #undef __FUNC__ 13805615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1381f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1382f44d3a6dSSatish Balay { 1383f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1384f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1385f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1386f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1387f44d3a6dSSatish Balay 1388*3a40ed3dSBarry Smith PetscFunctionBegin; 1389f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1390f44d3a6dSSatish Balay 1391c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1392c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1393f44d3a6dSSatish Balay 1394f44d3a6dSSatish Balay idx = a->j; 1395f44d3a6dSSatish Balay v = a->a; 1396f44d3a6dSSatish Balay ii = a->i; 1397f44d3a6dSSatish Balay 1398f44d3a6dSSatish Balay 1399f44d3a6dSSatish Balay if (!a->mult_work) { 1400f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1401f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1402f44d3a6dSSatish Balay } 1403f44d3a6dSSatish Balay work = a->mult_work; 1404f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1405f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1406f44d3a6dSSatish Balay ncols = n*bs; 1407f44d3a6dSSatish Balay workt = work; 1408f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1409f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1410f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1411f44d3a6dSSatish Balay workt += bs; 1412f44d3a6dSSatish Balay } 1413f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1414f44d3a6dSSatish Balay v += n*bs2; 1415f44d3a6dSSatish Balay z += bs; 1416f44d3a6dSSatish Balay } 1417c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1418c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1419f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1420*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1421f44d3a6dSSatish Balay } 1422f44d3a6dSSatish Balay 14235615d1e5SSatish Balay #undef __FUNC__ 14245615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ" 14258f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1426bb42667fSSatish Balay { 1427bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14281a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1429bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1430bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 14319df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1432119a934aSSatish Balay 1433119a934aSSatish Balay 1434*3a40ed3dSBarry Smith PetscFunctionBegin; 143590f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 143690f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1437bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1438bb42667fSSatish Balay 1439119a934aSSatish Balay idx = a->j; 1440119a934aSSatish Balay v = a->a; 1441119a934aSSatish Balay ii = a->i; 1442119a934aSSatish Balay 1443119a934aSSatish Balay switch (bs) { 1444119a934aSSatish Balay case 1: 1445119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1446119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1447119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1448119a934aSSatish Balay ib = idx + ai[i]; 1449119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1450bb1453f0SSatish Balay rval = ib[j]; 1451bb1453f0SSatish Balay z[rval] += *v++ * x1; 1452119a934aSSatish Balay } 1453119a934aSSatish Balay } 1454119a934aSSatish Balay break; 1455119a934aSSatish Balay case 2: 1456119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1457119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1458119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1459119a934aSSatish Balay ib = idx + ai[i]; 1460119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1461119a934aSSatish Balay rval = ib[j]*2; 1462bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1463bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1464119a934aSSatish Balay v += 4; 1465119a934aSSatish Balay } 1466119a934aSSatish Balay } 1467119a934aSSatish Balay break; 1468119a934aSSatish Balay case 3: 1469119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1470119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1471119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1472119a934aSSatish Balay ib = idx + ai[i]; 1473119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1474119a934aSSatish Balay rval = ib[j]*3; 1475bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1476bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1477bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1478119a934aSSatish Balay v += 9; 1479119a934aSSatish Balay } 1480119a934aSSatish Balay } 1481119a934aSSatish Balay break; 1482119a934aSSatish Balay case 4: 1483119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1484119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1485119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1486119a934aSSatish Balay ib = idx + ai[i]; 1487119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1488119a934aSSatish Balay rval = ib[j]*4; 1489bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1490bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1491bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1492bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1493119a934aSSatish Balay v += 16; 1494119a934aSSatish Balay } 1495119a934aSSatish Balay } 1496119a934aSSatish Balay break; 1497119a934aSSatish Balay case 5: 1498119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1499119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1500119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1501119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1502119a934aSSatish Balay ib = idx + ai[i]; 1503119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1504119a934aSSatish Balay rval = ib[j]*5; 1505bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1506bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1507bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1508bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1509bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1510119a934aSSatish Balay v += 25; 1511119a934aSSatish Balay } 1512119a934aSSatish Balay } 1513119a934aSSatish Balay break; 1514119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1515119a934aSSatish Balay default: { 1516119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1517119a934aSSatish Balay if (!a->mult_work) { 15183b547af2SSatish Balay k = PetscMax(a->m,a->n); 1519bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1520119a934aSSatish Balay CHKPTRQ(a->mult_work); 1521119a934aSSatish Balay } 1522119a934aSSatish Balay work = a->mult_work; 1523119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1524119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1525119a934aSSatish Balay ncols = n*bs; 1526119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1527119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1528119a934aSSatish Balay v += n*bs2; 1529119a934aSSatish Balay x += bs; 1530119a934aSSatish Balay workt = work; 1531119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1532119a934aSSatish Balay zb = z + bs*(*idx++); 1533bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1534119a934aSSatish Balay workt += bs; 1535119a934aSSatish Balay } 1536119a934aSSatish Balay } 1537119a934aSSatish Balay } 1538119a934aSSatish Balay } 15390419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 15400419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1541faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1542*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1543faf6580fSSatish Balay } 15441c351548SSatish Balay 15455615d1e5SSatish Balay #undef __FUNC__ 15465615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 15478f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1548faf6580fSSatish Balay { 1549faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1550faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1551faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1552faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1553faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1554faf6580fSSatish Balay 1555*3a40ed3dSBarry Smith PetscFunctionBegin; 155690f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 155790f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1558faf6580fSSatish Balay 1559648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1560648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1561648c1d95SSatish Balay 1562faf6580fSSatish Balay idx = a->j; 1563faf6580fSSatish Balay v = a->a; 1564faf6580fSSatish Balay ii = a->i; 1565faf6580fSSatish Balay 1566faf6580fSSatish Balay switch (bs) { 1567faf6580fSSatish Balay case 1: 1568faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1569faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1570faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1571faf6580fSSatish Balay ib = idx + ai[i]; 1572faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1573faf6580fSSatish Balay rval = ib[j]; 1574faf6580fSSatish Balay z[rval] += *v++ * x1; 1575faf6580fSSatish Balay } 1576faf6580fSSatish Balay } 1577faf6580fSSatish Balay break; 1578faf6580fSSatish Balay case 2: 1579faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1580faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1581faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1582faf6580fSSatish Balay ib = idx + ai[i]; 1583faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1584faf6580fSSatish Balay rval = ib[j]*2; 1585faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1586faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1587faf6580fSSatish Balay v += 4; 1588faf6580fSSatish Balay } 1589faf6580fSSatish Balay } 1590faf6580fSSatish Balay break; 1591faf6580fSSatish Balay case 3: 1592faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1593faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1594faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1595faf6580fSSatish Balay ib = idx + ai[i]; 1596faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1597faf6580fSSatish Balay rval = ib[j]*3; 1598faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1599faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1600faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1601faf6580fSSatish Balay v += 9; 1602faf6580fSSatish Balay } 1603faf6580fSSatish Balay } 1604faf6580fSSatish Balay break; 1605faf6580fSSatish Balay case 4: 1606faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1607faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1608faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1609faf6580fSSatish Balay ib = idx + ai[i]; 1610faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1611faf6580fSSatish Balay rval = ib[j]*4; 1612faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1613faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1614faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1615faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1616faf6580fSSatish Balay v += 16; 1617faf6580fSSatish Balay } 1618faf6580fSSatish Balay } 1619faf6580fSSatish Balay break; 1620faf6580fSSatish Balay case 5: 1621faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1622faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1623faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1624faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1625faf6580fSSatish Balay ib = idx + ai[i]; 1626faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1627faf6580fSSatish Balay rval = ib[j]*5; 1628faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1629faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1630faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1631faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1632faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1633faf6580fSSatish Balay v += 25; 1634faf6580fSSatish Balay } 1635faf6580fSSatish Balay } 1636faf6580fSSatish Balay break; 1637faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1638faf6580fSSatish Balay default: { 1639faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1640faf6580fSSatish Balay if (!a->mult_work) { 1641faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1642faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1643faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1644faf6580fSSatish Balay } 1645faf6580fSSatish Balay work = a->mult_work; 1646faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1647faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1648faf6580fSSatish Balay ncols = n*bs; 1649faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1650faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1651faf6580fSSatish Balay v += n*bs2; 1652faf6580fSSatish Balay x += bs; 1653faf6580fSSatish Balay workt = work; 1654faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1655faf6580fSSatish Balay zb = z + bs*(*idx++); 1656faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1657faf6580fSSatish Balay workt += bs; 1658faf6580fSSatish Balay } 1659faf6580fSSatish Balay } 1660faf6580fSSatish Balay } 1661faf6580fSSatish Balay } 1662faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1663faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1664faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 1665*3a40ed3dSBarry Smith PetscFunctionReturn(0); 16662593348eSBarry Smith } 16672593348eSBarry Smith 16685615d1e5SSatish Balay #undef __FUNC__ 1669d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" 16708f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 16712593348eSBarry Smith { 1672b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16734e220ebcSLois Curfman McInnes 1674*3a40ed3dSBarry Smith PetscFunctionBegin; 16754e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 16764e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 16774e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 16784e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 16794e220ebcSLois Curfman McInnes info->block_size = a->bs2; 16804e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 16814e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 16824e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 16834e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 16844e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 16854e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 16864e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 16874e220ebcSLois Curfman McInnes info->memory = A->mem; 16884e220ebcSLois Curfman McInnes if (A->factor) { 16894e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 16904e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 16914e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 16924e220ebcSLois Curfman McInnes } else { 16934e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 16944e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16954e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16964e220ebcSLois Curfman McInnes } 1697*3a40ed3dSBarry Smith PetscFunctionReturn(0); 16982593348eSBarry Smith } 16992593348eSBarry Smith 17005615d1e5SSatish Balay #undef __FUNC__ 17015615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ" 17028f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 170391d316f6SSatish Balay { 170491d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 170591d316f6SSatish Balay 1706*3a40ed3dSBarry Smith PetscFunctionBegin; 1707e3372554SBarry Smith if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 170891d316f6SSatish Balay 170991d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1710*3a40ed3dSBarry Smith if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1711*3a40ed3dSBarry Smith *flg = PETSC_FALSE; PetscFunctionReturn(0); 171291d316f6SSatish Balay } 171391d316f6SSatish Balay 171491d316f6SSatish Balay /* if the a->i are the same */ 171591d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1716*3a40ed3dSBarry Smith *flg = PETSC_FALSE; PetscFunctionReturn(0); 171791d316f6SSatish Balay } 171891d316f6SSatish Balay 171991d316f6SSatish Balay /* if a->j are the same */ 172091d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1721*3a40ed3dSBarry Smith *flg = PETSC_FALSE; PetscFunctionReturn(0); 172291d316f6SSatish Balay } 172391d316f6SSatish Balay 172491d316f6SSatish Balay /* if a->a are the same */ 172591d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1726*3a40ed3dSBarry Smith *flg = PETSC_FALSE; PetscFunctionReturn(0); 172791d316f6SSatish Balay } 172891d316f6SSatish Balay *flg = PETSC_TRUE; 1729*3a40ed3dSBarry Smith PetscFunctionReturn(0); 173091d316f6SSatish Balay 173191d316f6SSatish Balay } 173291d316f6SSatish Balay 17335615d1e5SSatish Balay #undef __FUNC__ 17345615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 17358f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 173691d316f6SSatish Balay { 173791d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17387e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 173917e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 174017e48fc4SSatish Balay 1741*3a40ed3dSBarry Smith PetscFunctionBegin; 17420513a670SBarry Smith if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 174317e48fc4SSatish Balay bs = a->bs; 174417e48fc4SSatish Balay aa = a->a; 174517e48fc4SSatish Balay ai = a->i; 174617e48fc4SSatish Balay aj = a->j; 174717e48fc4SSatish Balay ambs = a->mbs; 17489df24120SSatish Balay bs2 = a->bs2; 174991d316f6SSatish Balay 175091d316f6SSatish Balay VecSet(&zero,v); 175190f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1752e3372554SBarry Smith if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 175317e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 175417e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 175517e48fc4SSatish Balay if (aj[j] == i) { 175617e48fc4SSatish Balay row = i*bs; 17577e67e3f9SSatish Balay aa_j = aa+j*bs2; 17587e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 175991d316f6SSatish Balay break; 176091d316f6SSatish Balay } 176191d316f6SSatish Balay } 176291d316f6SSatish Balay } 1763*3a40ed3dSBarry Smith PetscFunctionReturn(0); 176491d316f6SSatish Balay } 176591d316f6SSatish Balay 17665615d1e5SSatish Balay #undef __FUNC__ 17675615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 17688f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 17699867e207SSatish Balay { 17709867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17719867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 17727e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 17739867e207SSatish Balay 1774*3a40ed3dSBarry Smith PetscFunctionBegin; 17759867e207SSatish Balay ai = a->i; 17769867e207SSatish Balay aj = a->j; 17779867e207SSatish Balay aa = a->a; 17789867e207SSatish Balay m = a->m; 17799867e207SSatish Balay n = a->n; 17809867e207SSatish Balay bs = a->bs; 17819867e207SSatish Balay mbs = a->mbs; 17829df24120SSatish Balay bs2 = a->bs2; 17839867e207SSatish Balay if (ll) { 178490f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1785e3372554SBarry Smith if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 17869867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17879867e207SSatish Balay M = ai[i+1] - ai[i]; 17889867e207SSatish Balay li = l + i*bs; 17897e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17909867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17917e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 17929867e207SSatish Balay (*v++) *= li[k%bs]; 17939867e207SSatish Balay } 17949867e207SSatish Balay } 17959867e207SSatish Balay } 17969867e207SSatish Balay } 17979867e207SSatish Balay 17989867e207SSatish Balay if (rr) { 179990f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1800e3372554SBarry Smith if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 18019867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 18029867e207SSatish Balay M = ai[i+1] - ai[i]; 18037e67e3f9SSatish Balay v = aa + bs2*ai[i]; 18049867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 18059867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 18069867e207SSatish Balay for ( k=0; k<bs; k++ ) { 18079867e207SSatish Balay x = ri[k]; 18089867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 18099867e207SSatish Balay } 18109867e207SSatish Balay } 18119867e207SSatish Balay } 18129867e207SSatish Balay } 1813*3a40ed3dSBarry Smith PetscFunctionReturn(0); 18149867e207SSatish Balay } 18159867e207SSatish Balay 18169867e207SSatish Balay 1817b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1818b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 18192a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1820736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1821736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 18221a6a6d98SBarry Smith 18231a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 18241a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 18251a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 18261a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 18271a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 18281a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 182948e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 18301a6a6d98SBarry Smith 18317fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 18327fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 18337fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 18347fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 18357fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 18367fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 18372593348eSBarry Smith 18385615d1e5SSatish Balay #undef __FUNC__ 18395615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 18408f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 18412593348eSBarry Smith { 1842b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18432593348eSBarry Smith Scalar *v = a->a; 18442593348eSBarry Smith double sum = 0.0; 18459df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 18462593348eSBarry Smith 1847*3a40ed3dSBarry Smith PetscFunctionBegin; 18482593348eSBarry Smith if (type == NORM_FROBENIUS) { 18490419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 1850*3a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX) 18512593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 18522593348eSBarry Smith #else 18532593348eSBarry Smith sum += (*v)*(*v); v++; 18542593348eSBarry Smith #endif 18552593348eSBarry Smith } 18562593348eSBarry Smith *norm = sqrt(sum); 18572593348eSBarry Smith } 18582593348eSBarry Smith else { 1859e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 18602593348eSBarry Smith } 1861*3a40ed3dSBarry Smith PetscFunctionReturn(0); 18622593348eSBarry Smith } 18632593348eSBarry Smith 18643eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 18652593348eSBarry Smith /* 18662593348eSBarry Smith note: This can only work for identity for row and col. It would 18672593348eSBarry Smith be good to check this and otherwise generate an error. 18682593348eSBarry Smith */ 18695615d1e5SSatish Balay #undef __FUNC__ 18705615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ" 18718f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 18722593348eSBarry Smith { 1873b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18742593348eSBarry Smith Mat outA; 1875de6a44a3SBarry Smith int ierr; 18762593348eSBarry Smith 1877*3a40ed3dSBarry Smith PetscFunctionBegin; 1878e3372554SBarry Smith if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 18792593348eSBarry Smith 18802593348eSBarry Smith outA = inA; 18812593348eSBarry Smith inA->factor = FACTOR_LU; 18822593348eSBarry Smith a->row = row; 18832593348eSBarry Smith a->col = col; 18842593348eSBarry Smith 1885eed86810SBarry Smith if (!a->solve_work) { 1886de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1887eed86810SBarry Smith PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1888eed86810SBarry Smith } 18892593348eSBarry Smith 18902593348eSBarry Smith if (!a->diag) { 1891b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 18922593348eSBarry Smith } 18937fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 18943eee16b0SBarry Smith 18953eee16b0SBarry Smith /* 18963eee16b0SBarry Smith Blocksize 4 has a special faster solver for ILU(0) factorization 18973eee16b0SBarry Smith with natural ordering 18983eee16b0SBarry Smith */ 18993eee16b0SBarry Smith if (a->bs == 4) { 19003eee16b0SBarry Smith inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 19013eee16b0SBarry Smith } 19023eee16b0SBarry Smith 1903*3a40ed3dSBarry Smith PetscFunctionReturn(0); 19042593348eSBarry Smith } 19052593348eSBarry Smith 19065615d1e5SSatish Balay #undef __FUNC__ 19075615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ" 19088f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 19092593348eSBarry Smith { 1910b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 19119df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1912*3a40ed3dSBarry Smith 1913*3a40ed3dSBarry Smith PetscFunctionBegin; 1914b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1915b6490206SBarry Smith PLogFlops(totalnz); 1916*3a40ed3dSBarry Smith PetscFunctionReturn(0); 19172593348eSBarry Smith } 19182593348eSBarry Smith 19195615d1e5SSatish Balay #undef __FUNC__ 19205615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ" 19218f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 19227e67e3f9SSatish Balay { 19237e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 19247e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1925a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 19269df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 19277e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 19287e67e3f9SSatish Balay 1929*3a40ed3dSBarry Smith PetscFunctionBegin; 19307e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 19317e67e3f9SSatish Balay row = im[k]; brow = row/bs; 1932e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 1933e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 1934a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 19357e67e3f9SSatish Balay nrow = ailen[brow]; 19367e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 1937e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1938e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1939a7c10996SSatish Balay col = in[l] ; 19407e67e3f9SSatish Balay bcol = col/bs; 19417e67e3f9SSatish Balay cidx = col%bs; 19427e67e3f9SSatish Balay ridx = row%bs; 19437e67e3f9SSatish Balay high = nrow; 19447e67e3f9SSatish Balay low = 0; /* assume unsorted */ 19457e67e3f9SSatish Balay while (high-low > 5) { 19467e67e3f9SSatish Balay t = (low+high)/2; 19477e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 19487e67e3f9SSatish Balay else low = t; 19497e67e3f9SSatish Balay } 19507e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 19517e67e3f9SSatish Balay if (rp[i] > bcol) break; 19527e67e3f9SSatish Balay if (rp[i] == bcol) { 19537e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 19547e67e3f9SSatish Balay goto finished; 19557e67e3f9SSatish Balay } 19567e67e3f9SSatish Balay } 19577e67e3f9SSatish Balay *v++ = zero; 19587e67e3f9SSatish Balay finished:; 19597e67e3f9SSatish Balay } 19607e67e3f9SSatish Balay } 1961*3a40ed3dSBarry Smith PetscFunctionReturn(0); 19627e67e3f9SSatish Balay } 19637e67e3f9SSatish Balay 19645615d1e5SSatish Balay #undef __FUNC__ 1965d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 19668f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 19675a838052SSatish Balay { 19685a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1969*3a40ed3dSBarry Smith 1970*3a40ed3dSBarry Smith PetscFunctionBegin; 19715a838052SSatish Balay *bs = baij->bs; 1972*3a40ed3dSBarry Smith PetscFunctionReturn(0); 19735a838052SSatish Balay } 19745a838052SSatish Balay 1975d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 19765615d1e5SSatish Balay #undef __FUNC__ 19775615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1978d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1979d9b7c43dSSatish Balay { 1980d9b7c43dSSatish Balay int i,row; 1981*3a40ed3dSBarry Smith 1982*3a40ed3dSBarry Smith PetscFunctionBegin; 1983d9b7c43dSSatish Balay row = idx[0]; 1984*3a40ed3dSBarry Smith if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } 1985d9b7c43dSSatish Balay 1986d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1987*3a40ed3dSBarry Smith if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } 1988d9b7c43dSSatish Balay } 1989d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1990*3a40ed3dSBarry Smith PetscFunctionReturn(0); 1991d9b7c43dSSatish Balay } 1992d9b7c43dSSatish Balay 19935615d1e5SSatish Balay #undef __FUNC__ 19945615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ" 19958f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1996d9b7c43dSSatish Balay { 1997d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1998d9b7c43dSSatish Balay IS is_local; 1999d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 2000d9b7c43dSSatish Balay PetscTruth flg; 2001d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 2002d9b7c43dSSatish Balay 2003*3a40ed3dSBarry Smith PetscFunctionBegin; 2004d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2005d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 2006d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 2007537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 2008d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 2009d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 2010d9b7c43dSSatish Balay 2011d9b7c43dSSatish Balay i = 0; 2012d9b7c43dSSatish Balay while (i < is_n) { 2013e3372554SBarry Smith if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 2014d9b7c43dSSatish Balay flg = PETSC_FALSE; 2015d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 2016d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 2017d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 2018d9b7c43dSSatish Balay i += bs; 2019d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 2020d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 2021d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 2022d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 2023d9b7c43dSSatish Balay aa[0] = zero; 2024d9b7c43dSSatish Balay aa+=bs; 2025d9b7c43dSSatish Balay } 2026d9b7c43dSSatish Balay i++; 2027d9b7c43dSSatish Balay } 2028d9b7c43dSSatish Balay } 2029d9b7c43dSSatish Balay if (diag) { 2030d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 2031d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 2032d9b7c43dSSatish Balay } 2033d9b7c43dSSatish Balay } 2034d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 2035d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2036d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 20379a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2038d9b7c43dSSatish Balay 2039*3a40ed3dSBarry Smith PetscFunctionReturn(0); 2040d9b7c43dSSatish Balay } 20411c351548SSatish Balay 20425615d1e5SSatish Balay #undef __FUNC__ 2043d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2044ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 2045ba4ca20aSSatish Balay { 2046ba4ca20aSSatish Balay static int called = 0; 2047ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 2048ba4ca20aSSatish Balay 2049*3a40ed3dSBarry Smith PetscFunctionBegin; 2050*3a40ed3dSBarry Smith if (called) {PetscFunctionReturn(0);} else called = 1; 2051ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2052ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 2053*3a40ed3dSBarry Smith PetscFunctionReturn(0); 2054ba4ca20aSSatish Balay } 2055d9b7c43dSSatish Balay 20562593348eSBarry Smith /* -------------------------------------------------------------------*/ 2057cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 20589867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2059f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2060faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 20611a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 2062b6490206SBarry Smith 0,0, 2063de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 2064b6490206SBarry Smith 0, 2065f2501298SSatish Balay MatTranspose_SeqBAIJ, 206617e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 20679867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2068584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 2069b6490206SBarry Smith 0, 2070d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 20717fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2072b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2073de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 2074d402145bSBarry Smith 0,0, 2075b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 2076b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 2077af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 20787e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 2079ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 20803b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 20813b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 208292c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 208392c4ed94SBarry Smith 0,0,0,0,0,0, 208492c4ed94SBarry Smith MatSetValuesBlocked_SeqBAIJ}; 20852593348eSBarry Smith 20865615d1e5SSatish Balay #undef __FUNC__ 20875615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ" 20882593348eSBarry Smith /*@C 208944cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 209044cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 209144cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 20922bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 20932bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 20942593348eSBarry Smith 20952593348eSBarry Smith Input Parameters: 2096029af93fSBarry Smith . comm - MPI communicator, set to PETSC_COMM_SELF 2097b6490206SBarry Smith . bs - size of block 20982593348eSBarry Smith . m - number of rows 20992593348eSBarry Smith . n - number of columns 2100b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 21012bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 21022bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 21032593348eSBarry Smith 21042593348eSBarry Smith Output Parameter: 21052593348eSBarry Smith . A - the matrix 21062593348eSBarry Smith 21070513a670SBarry Smith Options Database Keys: 21080513a670SBarry Smith $ -mat_no_unroll - uses code that does not unroll the loops in the 21090effe34fSLois Curfman McInnes $ block calculations (much slower) 21100513a670SBarry Smith $ -mat_block_size - size of the blocks to use 21110513a670SBarry Smith 21122593348eSBarry Smith Notes: 211344cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 21142593348eSBarry Smith storage. That is, the stored row and column indices can begin at 211544cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 21162593348eSBarry Smith 21172593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 21182593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 21192593348eSBarry Smith allocation. For additional details, see the users manual chapter on 21206da5968aSLois Curfman McInnes matrices. 21212593348eSBarry Smith 212244cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 21232593348eSBarry Smith @*/ 2124b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 21252593348eSBarry Smith { 21262593348eSBarry Smith Mat B; 2127b6490206SBarry Smith Mat_SeqBAIJ *b; 21283b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 21293b2fbd54SBarry Smith 2130*3a40ed3dSBarry Smith PetscFunctionBegin; 21313b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 2132e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2133b6490206SBarry Smith 21340513a670SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 21350513a670SBarry Smith 2136*3a40ed3dSBarry Smith if (mbs*bs!=m || nbs*bs!=n) { 2137e3372554SBarry Smith SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2138*3a40ed3dSBarry Smith } 21392593348eSBarry Smith 21402593348eSBarry Smith *A = 0; 2141d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 21422593348eSBarry Smith PLogObjectCreate(B); 2143b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 214444cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 21452593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 21461a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 21471a6a6d98SBarry Smith if (!flg) { 21487fc0212eSBarry Smith switch (bs) { 21497fc0212eSBarry Smith case 1: 21507fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21511a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 215239b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 2153f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 21547fc0212eSBarry Smith break; 21554eeb42bcSBarry Smith case 2: 21564eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 21571a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 215839b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 2159f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 21606d84be18SBarry Smith break; 2161f327dd97SBarry Smith case 3: 2162f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 21631a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 216439b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 2165f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 21664eeb42bcSBarry Smith break; 21676d84be18SBarry Smith case 4: 21686d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 21691a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 217039b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 2171f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 21726d84be18SBarry Smith break; 21736d84be18SBarry Smith case 5: 21746d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 21751a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 217639b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 2177f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 21786d84be18SBarry Smith break; 217948e9ddb2SSatish Balay case 7: 218048e9ddb2SSatish Balay B->ops.mult = MatMult_SeqBAIJ_7; 218148e9ddb2SSatish Balay B->ops.solve = MatSolve_SeqBAIJ_7; 218248e9ddb2SSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_7; 218348e9ddb2SSatish Balay break; 21847fc0212eSBarry Smith } 21851a6a6d98SBarry Smith } 2186b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 2187b6490206SBarry Smith B->view = MatView_SeqBAIJ; 21882593348eSBarry Smith B->factor = 0; 21892593348eSBarry Smith B->lupivotthreshold = 1.0; 219090f02eecSBarry Smith B->mapping = 0; 21912593348eSBarry Smith b->row = 0; 21922593348eSBarry Smith b->col = 0; 21932593348eSBarry Smith b->reallocs = 0; 21942593348eSBarry Smith 219544cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 219644cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 2197b6490206SBarry Smith b->mbs = mbs; 2198f2501298SSatish Balay b->nbs = nbs; 2199b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 22002593348eSBarry Smith if (nnz == PETSC_NULL) { 2201119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 22022593348eSBarry Smith else if (nz <= 0) nz = 1; 2203b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2204b6490206SBarry Smith nz = nz*mbs; 2205*3a40ed3dSBarry Smith } else { 22062593348eSBarry Smith nz = 0; 2207b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 22082593348eSBarry Smith } 22092593348eSBarry Smith 22102593348eSBarry Smith /* allocate the matrix space */ 22117e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 22122593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 22137e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 22147e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 22152593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 22162593348eSBarry Smith b->i = b->j + nz; 22172593348eSBarry Smith b->singlemalloc = 1; 22182593348eSBarry Smith 2219de6a44a3SBarry Smith b->i[0] = 0; 2220b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 22212593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 22222593348eSBarry Smith } 22232593348eSBarry Smith 2224b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2225b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2226f09e8eb9SSatish Balay PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2227b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 22282593348eSBarry Smith 2229b6490206SBarry Smith b->bs = bs; 22309df24120SSatish Balay b->bs2 = bs2; 2231b6490206SBarry Smith b->mbs = mbs; 22322593348eSBarry Smith b->nz = 0; 223318e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 22342593348eSBarry Smith b->sorted = 0; 22352593348eSBarry Smith b->roworiented = 1; 22362593348eSBarry Smith b->nonew = 0; 22372593348eSBarry Smith b->diag = 0; 22382593348eSBarry Smith b->solve_work = 0; 2239de6a44a3SBarry Smith b->mult_work = 0; 22402593348eSBarry Smith b->spptr = 0; 22414e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 22424e220ebcSLois Curfman McInnes 22432593348eSBarry Smith *A = B; 22442593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 22452593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2246*3a40ed3dSBarry Smith PetscFunctionReturn(0); 22472593348eSBarry Smith } 22482593348eSBarry Smith 22495615d1e5SSatish Balay #undef __FUNC__ 22505615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2251b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 22522593348eSBarry Smith { 22532593348eSBarry Smith Mat C; 2254b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 22559df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2256de6a44a3SBarry Smith 2257*3a40ed3dSBarry Smith PetscFunctionBegin; 2258e3372554SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 22592593348eSBarry Smith 22602593348eSBarry Smith *B = 0; 2261d4bb536fSBarry Smith PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 22622593348eSBarry Smith PLogObjectCreate(C); 2263b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 22642593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2265b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 2266b6490206SBarry Smith C->view = MatView_SeqBAIJ; 22672593348eSBarry Smith C->factor = A->factor; 22682593348eSBarry Smith c->row = 0; 22692593348eSBarry Smith c->col = 0; 22702593348eSBarry Smith C->assembled = PETSC_TRUE; 22712593348eSBarry Smith 227244cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 227344cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 227444cd7ae7SLois Curfman McInnes C->M = a->m; 227544cd7ae7SLois Curfman McInnes C->N = a->n; 227644cd7ae7SLois Curfman McInnes 2277b6490206SBarry Smith c->bs = a->bs; 22789df24120SSatish Balay c->bs2 = a->bs2; 2279b6490206SBarry Smith c->mbs = a->mbs; 2280b6490206SBarry Smith c->nbs = a->nbs; 22812593348eSBarry Smith 2282b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2283b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2284b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22852593348eSBarry Smith c->imax[i] = a->imax[i]; 22862593348eSBarry Smith c->ilen[i] = a->ilen[i]; 22872593348eSBarry Smith } 22882593348eSBarry Smith 22892593348eSBarry Smith /* allocate the matrix space */ 22902593348eSBarry Smith c->singlemalloc = 1; 22917e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 22922593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 22937e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 2294de6a44a3SBarry Smith c->i = c->j + nz; 2295b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2296b6490206SBarry Smith if (mbs > 0) { 2297de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 22982593348eSBarry Smith if (cpvalues == COPY_VALUES) { 22997e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 23002593348eSBarry Smith } 23012593348eSBarry Smith } 23022593348eSBarry Smith 2303f09e8eb9SSatish Balay PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 23042593348eSBarry Smith c->sorted = a->sorted; 23052593348eSBarry Smith c->roworiented = a->roworiented; 23062593348eSBarry Smith c->nonew = a->nonew; 23072593348eSBarry Smith 23082593348eSBarry Smith if (a->diag) { 2309b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2310b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2311b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 23122593348eSBarry Smith c->diag[i] = a->diag[i]; 23132593348eSBarry Smith } 23142593348eSBarry Smith } 23152593348eSBarry Smith else c->diag = 0; 23162593348eSBarry Smith c->nz = a->nz; 23172593348eSBarry Smith c->maxnz = a->maxnz; 23182593348eSBarry Smith c->solve_work = 0; 23192593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 23207fc0212eSBarry Smith c->mult_work = 0; 23212593348eSBarry Smith *B = C; 2322*3a40ed3dSBarry Smith PetscFunctionReturn(0); 23232593348eSBarry Smith } 23242593348eSBarry Smith 23255615d1e5SSatish Balay #undef __FUNC__ 23265615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ" 232719bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 23282593348eSBarry Smith { 2329b6490206SBarry Smith Mat_SeqBAIJ *a; 23302593348eSBarry Smith Mat B; 2331de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2332b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 233335aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2334a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 2335b6490206SBarry Smith Scalar *aa; 233619bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 23372593348eSBarry Smith 2338*3a40ed3dSBarry Smith PetscFunctionBegin; 23391a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2340de6a44a3SBarry Smith bs2 = bs*bs; 2341b6490206SBarry Smith 23422593348eSBarry Smith MPI_Comm_size(comm,&size); 2343e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"view must have one processor"); 234490ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 23450752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 2346e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 23472593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 23482593348eSBarry Smith 2349e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 235035aab85fSBarry Smith 235135aab85fSBarry Smith /* 235235aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 235335aab85fSBarry Smith divisible by the blocksize 235435aab85fSBarry Smith */ 2355b6490206SBarry Smith mbs = M/bs; 235635aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 235735aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 235835aab85fSBarry Smith else mbs++; 235935aab85fSBarry Smith if (extra_rows) { 2360537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 236135aab85fSBarry Smith } 2362b6490206SBarry Smith 23632593348eSBarry Smith /* read in row lengths */ 236435aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 23650752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 236635aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 23672593348eSBarry Smith 2368b6490206SBarry Smith /* read in column indices */ 236935aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 23700752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 237135aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2372b6490206SBarry Smith 2373b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2374b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2375b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 237635aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 237735aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 237835aab85fSBarry Smith masked = mask + mbs; 2379b6490206SBarry Smith rowcount = 0; nzcount = 0; 2380b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 238135aab85fSBarry Smith nmask = 0; 2382b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2383b6490206SBarry Smith kmax = rowlengths[rowcount]; 2384b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 238535aab85fSBarry Smith tmp = jj[nzcount++]/bs; 238635aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2387b6490206SBarry Smith } 2388b6490206SBarry Smith rowcount++; 2389b6490206SBarry Smith } 239035aab85fSBarry Smith browlengths[i] += nmask; 239135aab85fSBarry Smith /* zero out the mask elements we set */ 239235aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2393b6490206SBarry Smith } 2394b6490206SBarry Smith 23952593348eSBarry Smith /* create our matrix */ 23963eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 23972593348eSBarry Smith B = *A; 2398b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 23992593348eSBarry Smith 24002593348eSBarry Smith /* set matrix "i" values */ 2401de6a44a3SBarry Smith a->i[0] = 0; 2402b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 2403b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2404b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 24052593348eSBarry Smith } 2406b6490206SBarry Smith a->nz = 0; 2407b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 24082593348eSBarry Smith 2409b6490206SBarry Smith /* read in nonzero values */ 241035aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 24110752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 241235aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2413b6490206SBarry Smith 2414b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2415b6490206SBarry Smith nzcount = 0; jcount = 0; 2416b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2417b6490206SBarry Smith nzcountb = nzcount; 241835aab85fSBarry Smith nmask = 0; 2419b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2420b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2421b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 242235aab85fSBarry Smith tmp = jj[nzcount++]/bs; 242335aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2424b6490206SBarry Smith } 2425b6490206SBarry Smith rowcount++; 2426b6490206SBarry Smith } 2427de6a44a3SBarry Smith /* sort the masked values */ 242877c4ece6SBarry Smith PetscSortInt(nmask,masked); 2429de6a44a3SBarry Smith 2430b6490206SBarry Smith /* set "j" values into matrix */ 2431b6490206SBarry Smith maskcount = 1; 243235aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 243335aab85fSBarry Smith a->j[jcount++] = masked[j]; 2434de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2435b6490206SBarry Smith } 2436b6490206SBarry Smith /* set "a" values into matrix */ 2437de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2438b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2439b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2440b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2441de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2442de6a44a3SBarry Smith block = mask[tmp] - 1; 2443de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2444de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2445b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2446b6490206SBarry Smith } 2447b6490206SBarry Smith } 244835aab85fSBarry Smith /* zero out the mask elements we set */ 244935aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2450b6490206SBarry Smith } 2451e3372554SBarry Smith if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2452b6490206SBarry Smith 2453b6490206SBarry Smith PetscFree(rowlengths); 2454b6490206SBarry Smith PetscFree(browlengths); 2455b6490206SBarry Smith PetscFree(aa); 2456b6490206SBarry Smith PetscFree(jj); 2457b6490206SBarry Smith PetscFree(mask); 2458b6490206SBarry Smith 2459b6490206SBarry Smith B->assembled = PETSC_TRUE; 2460de6a44a3SBarry Smith 24619c01be13SBarry Smith ierr = MatView_Private(B); CHKERRQ(ierr); 2462*3a40ed3dSBarry Smith PetscFunctionReturn(0); 24632593348eSBarry Smith } 24642593348eSBarry Smith 24652593348eSBarry Smith 24662593348eSBarry Smith 2467