1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*0effe34fSLois Curfman McInnes static char vcid[] = "$Id: baij.c,v 1.110 1997/08/22 15:14:29 bsmith Exp curfman $"; 32593348eSBarry Smith #endif 42593348eSBarry Smith 52593348eSBarry Smith /* 6b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 72593348eSBarry Smith matrix storage format. 82593348eSBarry Smith */ 93369ce9aSBarry Smith 103369ce9aSBarry Smith #include "pinclude/pviewer.h" 113369ce9aSBarry Smith #include "sys.h" 1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 131a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 141a6a6d98SBarry Smith #include "src/inline/spops.h" 152593348eSBarry Smith #include "petsc.h" 163b547af2SSatish Balay 172593348eSBarry Smith 18de6a44a3SBarry Smith /* 19de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 20de6a44a3SBarry Smith */ 21de6a44a3SBarry Smith 225615d1e5SSatish Balay #undef __FUNC__ 235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 25de6a44a3SBarry Smith { 26de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 277fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 28de6a44a3SBarry Smith 29de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 317fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 32de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33de6a44a3SBarry Smith if (a->j[j] == i) { 34de6a44a3SBarry Smith diag[i] = j; 35de6a44a3SBarry Smith break; 36de6a44a3SBarry Smith } 37de6a44a3SBarry Smith } 38de6a44a3SBarry Smith } 39de6a44a3SBarry Smith a->diag = diag; 40de6a44a3SBarry Smith return 0; 41de6a44a3SBarry Smith } 422593348eSBarry Smith 432593348eSBarry Smith 443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 453b2fbd54SBarry Smith 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 503b2fbd54SBarry Smith { 513b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 523b2fbd54SBarry Smith int ierr, n = a->mbs,i; 533b2fbd54SBarry Smith 543b2fbd54SBarry Smith *nn = n; 553b2fbd54SBarry Smith if (!ia) return 0; 563b2fbd54SBarry Smith if (symmetric) { 573b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 583b2fbd54SBarry Smith } else if (oshift == 1) { 593b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 603b2fbd54SBarry Smith int nz = a->i[n] + 1; 613b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 623b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 633b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 643b2fbd54SBarry Smith } else { 653b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 663b2fbd54SBarry Smith } 673b2fbd54SBarry Smith 683b2fbd54SBarry Smith return 0; 693b2fbd54SBarry Smith } 703b2fbd54SBarry Smith 715615d1e5SSatish Balay #undef __FUNC__ 72d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 743b2fbd54SBarry Smith PetscTruth *done) 753b2fbd54SBarry Smith { 763b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773b2fbd54SBarry Smith int i,n = a->mbs; 783b2fbd54SBarry Smith 793b2fbd54SBarry Smith if (!ia) return 0; 803b2fbd54SBarry Smith if (symmetric) { 813b2fbd54SBarry Smith PetscFree(*ia); 823b2fbd54SBarry Smith PetscFree(*ja); 83af5da2bfSBarry Smith } else if (oshift == 1) { 843b2fbd54SBarry Smith int nz = a->i[n]; 853b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 863b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 873b2fbd54SBarry Smith } 883b2fbd54SBarry Smith return 0; 893b2fbd54SBarry Smith } 903b2fbd54SBarry Smith 913b2fbd54SBarry Smith 925615d1e5SSatish Balay #undef __FUNC__ 93d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" 94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 952593348eSBarry Smith { 96b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 979df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98b6490206SBarry Smith Scalar *aa; 992593348eSBarry Smith 10090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1012593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1022593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1033b2fbd54SBarry Smith 1042593348eSBarry Smith col_lens[1] = a->m; 1052593348eSBarry Smith col_lens[2] = a->n; 1067e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1072593348eSBarry Smith 1082593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 109b6490206SBarry Smith count = 0; 110b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 111b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 112b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113b6490206SBarry Smith } 1142593348eSBarry Smith } 11577c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1162593348eSBarry Smith PetscFree(col_lens); 1172593348eSBarry Smith 1182593348eSBarry Smith /* store column indices (zero start index) */ 11966963ce1SSatish Balay jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120b6490206SBarry Smith count = 0; 121b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 122b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 123b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 125b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1262593348eSBarry Smith } 1272593348eSBarry Smith } 128b6490206SBarry Smith } 129b6490206SBarry Smith } 1307e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131b6490206SBarry Smith PetscFree(jj); 1322593348eSBarry Smith 1332593348eSBarry Smith /* store nonzero values */ 13466963ce1SSatish Balay aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135b6490206SBarry Smith count = 0; 136b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 137b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 138b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1407e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 141b6490206SBarry Smith } 142b6490206SBarry Smith } 143b6490206SBarry Smith } 144b6490206SBarry Smith } 1457e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146b6490206SBarry Smith PetscFree(aa); 1472593348eSBarry Smith return 0; 1482593348eSBarry Smith } 1492593348eSBarry Smith 1505615d1e5SSatish Balay #undef __FUNC__ 151d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1532593348eSBarry Smith { 154b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1559df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1562593348eSBarry Smith FILE *fd; 1572593348eSBarry Smith char *outputname; 1582593348eSBarry Smith 15990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1602593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 162639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1637ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1642593348eSBarry Smith } 165639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166e3372554SBarry Smith SETERRQ(1,0,"Matlab format not supported"); 1672593348eSBarry Smith } 168639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 16944cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17044cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17144cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17244cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17344cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 175766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17644cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17744cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 180766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 18144cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18344cd7ae7SLois Curfman McInnes #else 18444cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18544cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18644cd7ae7SLois Curfman McInnes #endif 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 19044cd7ae7SLois Curfman McInnes } 19144cd7ae7SLois Curfman McInnes } 19244cd7ae7SLois Curfman McInnes } 1932593348eSBarry Smith else { 194b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 195b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 196b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 197b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 200766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 20188685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2027e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 204766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 205766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 206766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 207766eeae4SLois Curfman McInnes } 20888685aaeSLois Curfman McInnes else { 2097e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 21088685aaeSLois Curfman McInnes } 21188685aaeSLois Curfman McInnes #else 2127e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 21388685aaeSLois Curfman McInnes #endif 2142593348eSBarry Smith } 2152593348eSBarry Smith } 2162593348eSBarry Smith fprintf(fd,"\n"); 2172593348eSBarry Smith } 2182593348eSBarry Smith } 219b6490206SBarry Smith } 2202593348eSBarry Smith fflush(fd); 2212593348eSBarry Smith return 0; 2222593348eSBarry Smith } 2232593348eSBarry Smith 2245615d1e5SSatish Balay #undef __FUNC__ 225d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" 2263270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2273270192aSSatish Balay { 2283270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2293270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2303270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2313270192aSSatish Balay Scalar *aa; 2323270192aSSatish Balay Draw draw; 2333270192aSSatish Balay DrawButton button; 2343270192aSSatish Balay PetscTruth isnull; 2353270192aSSatish Balay 2363270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2373270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2383270192aSSatish Balay 2393270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2403270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2413270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2423270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2433270192aSSatish Balay color = DRAW_BLUE; 2443270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2453270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2463270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2473270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2483270192aSSatish Balay aa = a->a + j*bs2; 2493270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2503270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2513270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 252b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2533270192aSSatish Balay } 2543270192aSSatish Balay } 2553270192aSSatish Balay } 2563270192aSSatish Balay } 2573270192aSSatish Balay color = DRAW_CYAN; 2583270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2593270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2603270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2613270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2623270192aSSatish Balay aa = a->a + j*bs2; 2633270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2643270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2653270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 266b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2673270192aSSatish Balay } 2683270192aSSatish Balay } 2693270192aSSatish Balay } 2703270192aSSatish Balay } 2713270192aSSatish Balay 2723270192aSSatish Balay color = DRAW_RED; 2733270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2743270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2753270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2763270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2773270192aSSatish Balay aa = a->a + j*bs2; 2783270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2793270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2803270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 281b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2823270192aSSatish Balay } 2833270192aSSatish Balay } 2843270192aSSatish Balay } 2853270192aSSatish Balay } 2863270192aSSatish Balay 2873270192aSSatish Balay DrawFlush(draw); 2883270192aSSatish Balay DrawGetPause(draw,&pause); 2893270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2903270192aSSatish Balay 2913270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2923270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2933270192aSSatish Balay while (button != BUTTON_RIGHT) { 2943270192aSSatish Balay DrawClear(draw); 2953270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2963270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2973270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2983270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 2993270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 3003270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 3013270192aSSatish Balay w *= scale; h *= scale; 3023270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 3033270192aSSatish Balay color = DRAW_BLUE; 3043270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3053270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3063270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3073270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3083270192aSSatish Balay aa = a->a + j*bs2; 3093270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3103270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3113270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 312b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3133270192aSSatish Balay } 3143270192aSSatish Balay } 3153270192aSSatish Balay } 3163270192aSSatish Balay } 3173270192aSSatish Balay color = DRAW_CYAN; 3183270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3193270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3203270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3213270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3223270192aSSatish Balay aa = a->a + j*bs2; 3233270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3243270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3253270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 326b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3273270192aSSatish Balay } 3283270192aSSatish Balay } 3293270192aSSatish Balay } 3303270192aSSatish Balay } 3313270192aSSatish Balay 3323270192aSSatish Balay color = DRAW_RED; 3333270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3343270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3353270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3363270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3373270192aSSatish Balay aa = a->a + j*bs2; 3383270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3393270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3403270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 341b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3423270192aSSatish Balay } 3433270192aSSatish Balay } 3443270192aSSatish Balay } 3453270192aSSatish Balay } 3463270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3473270192aSSatish Balay } 3483270192aSSatish Balay return 0; 3493270192aSSatish Balay } 3503270192aSSatish Balay 3515615d1e5SSatish Balay #undef __FUNC__ 352d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ" 3538f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3542593348eSBarry Smith { 3552593348eSBarry Smith Mat A = (Mat) obj; 35619bcc07fSBarry Smith ViewerType vtype; 35719bcc07fSBarry Smith int ierr; 3582593348eSBarry Smith 35919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36019bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 361e3372554SBarry Smith SETERRQ(1,0,"Matlab viewer not supported"); 3622593348eSBarry Smith } 36319bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 364b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3652593348eSBarry Smith } 36619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 367b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3682593348eSBarry Smith } 36919bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3703270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3712593348eSBarry Smith } 3722593348eSBarry Smith return 0; 3732593348eSBarry Smith } 374b6490206SBarry Smith 375119a934aSSatish Balay #define CHUNKSIZE 10 376cd0e1443SSatish Balay 3775615d1e5SSatish Balay #undef __FUNC__ 3785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ" 379639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 380cd0e1443SSatish Balay { 381cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 382cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 383cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 384a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3859df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 386cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 387cd0e1443SSatish Balay 388cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 389cd0e1443SSatish Balay row = im[k]; brow = row/bs; 3903b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 391e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 392e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 3933b2fbd54SBarry Smith #endif 3942c3acbe9SBarry Smith rp = aj + ai[brow]; 3952c3acbe9SBarry Smith ap = aa + bs2*ai[brow]; 3962c3acbe9SBarry Smith rmax = imax[brow]; 3972c3acbe9SBarry Smith nrow = ailen[brow]; 398cd0e1443SSatish Balay low = 0; 399cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 4003b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 401e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 402e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 4033b2fbd54SBarry Smith #endif 404a7c10996SSatish Balay col = in[l]; bcol = col/bs; 405cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 406cd0e1443SSatish Balay if (roworiented) { 407cd0e1443SSatish Balay value = *v++; 408cd0e1443SSatish Balay } 409cd0e1443SSatish Balay else { 410cd0e1443SSatish Balay value = v[k + l*m]; 411cd0e1443SSatish Balay } 412cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 4132c3acbe9SBarry Smith while (high-low > 7) { 414cd0e1443SSatish Balay t = (low+high)/2; 415cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 416cd0e1443SSatish Balay else low = t; 417cd0e1443SSatish Balay } 418cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 419cd0e1443SSatish Balay if (rp[i] > bcol) break; 420cd0e1443SSatish Balay if (rp[i] == bcol) { 4217e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 422cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 423cd0e1443SSatish Balay else *bap = value; 424f1241b54SBarry Smith goto noinsert1; 425cd0e1443SSatish Balay } 426cd0e1443SSatish Balay } 427c2653b3dSLois Curfman McInnes if (nonew == 1) goto noinsert1; 42811ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 429cd0e1443SSatish Balay if (nrow >= rmax) { 430cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 431cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 432cd0e1443SSatish Balay Scalar *new_a; 433cd0e1443SSatish Balay 43411ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 43596854ed6SLois Curfman McInnes 43696854ed6SLois Curfman McInnes /* Malloc new storage space */ 4377e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 438cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4397e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 440cd0e1443SSatish Balay new_i = new_j + new_nz; 441cd0e1443SSatish Balay 442cd0e1443SSatish Balay /* copy over old data into new slots */ 443cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4447e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 445a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 446a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 447a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 448cd0e1443SSatish Balay len*sizeof(int)); 449a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 450a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 451a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 452a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 453cd0e1443SSatish Balay /* free up old matrix storage */ 454cd0e1443SSatish Balay PetscFree(a->a); 455cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 456cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 457cd0e1443SSatish Balay a->singlemalloc = 1; 458cd0e1443SSatish Balay 459a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 460cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4617e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 46218e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 463cd0e1443SSatish Balay a->reallocs++; 464119a934aSSatish Balay a->nz++; 465cd0e1443SSatish Balay } 4667e67e3f9SSatish Balay N = nrow++ - 1; 467cd0e1443SSatish Balay /* shift up all the later entries in this row */ 468cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 469cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4707e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 471cd0e1443SSatish Balay } 4727e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 473cd0e1443SSatish Balay rp[i] = bcol; 4747e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 475f1241b54SBarry Smith noinsert1:; 476cd0e1443SSatish Balay low = i; 477cd0e1443SSatish Balay } 478cd0e1443SSatish Balay ailen[brow] = nrow; 479cd0e1443SSatish Balay } 480cd0e1443SSatish Balay return 0; 481cd0e1443SSatish Balay } 482cd0e1443SSatish Balay 4835615d1e5SSatish Balay #undef __FUNC__ 48405a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 48592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 48692c4ed94SBarry Smith { 48792c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4888a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 489dd9472c6SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 490dd9472c6SBarry Smith int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 4910e324ae4SSatish Balay Scalar *ap,*value=v,*aa=a->a,*bap; 49292c4ed94SBarry Smith 4930e324ae4SSatish Balay if (roworiented) { 4940e324ae4SSatish Balay stepval = (n-1)*bs; 4950e324ae4SSatish Balay } else { 4960e324ae4SSatish Balay stepval = (m-1)*bs; 4970e324ae4SSatish Balay } 49892c4ed94SBarry Smith for ( k=0; k<m; k++ ) { /* loop over added rows */ 49992c4ed94SBarry Smith row = im[k]; 50092c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 50192c4ed94SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 5028a84c255SSatish Balay if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 50392c4ed94SBarry Smith #endif 50492c4ed94SBarry Smith rp = aj + ai[row]; 50592c4ed94SBarry Smith ap = aa + bs2*ai[row]; 50692c4ed94SBarry Smith rmax = imax[row]; 50792c4ed94SBarry Smith nrow = ailen[row]; 50892c4ed94SBarry Smith low = 0; 50992c4ed94SBarry Smith for ( l=0; l<n; l++ ) { /* loop over added columns */ 51092c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 51192c4ed94SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 5128a84c255SSatish Balay if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 51392c4ed94SBarry Smith #endif 51492c4ed94SBarry Smith col = in[l]; 51592c4ed94SBarry Smith if (roworiented) { 5160e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 5170e324ae4SSatish Balay } else { 5180e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 51992c4ed94SBarry Smith } 52092c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 52192c4ed94SBarry Smith while (high-low > 7) { 52292c4ed94SBarry Smith t = (low+high)/2; 52392c4ed94SBarry Smith if (rp[t] > col) high = t; 52492c4ed94SBarry Smith else low = t; 52592c4ed94SBarry Smith } 52692c4ed94SBarry Smith for ( i=low; i<high; i++ ) { 52792c4ed94SBarry Smith if (rp[i] > col) break; 52892c4ed94SBarry Smith if (rp[i] == col) { 5298a84c255SSatish Balay bap = ap + bs2*i; 5300e324ae4SSatish Balay if (roworiented) { 5318a84c255SSatish Balay if (is == ADD_VALUES) { 532dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 533dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5348a84c255SSatish Balay bap[jj] += *value++; 535dd9472c6SBarry Smith } 536dd9472c6SBarry Smith } 5370e324ae4SSatish Balay } else { 538dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 539dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5400e324ae4SSatish Balay bap[jj] = *value++; 5418a84c255SSatish Balay } 542dd9472c6SBarry Smith } 543dd9472c6SBarry Smith } 5440e324ae4SSatish Balay } else { 5450e324ae4SSatish Balay if (is == ADD_VALUES) { 546dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 547dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5480e324ae4SSatish Balay *bap++ += *value++; 549dd9472c6SBarry Smith } 550dd9472c6SBarry Smith } 5510e324ae4SSatish Balay } else { 552dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 553dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5540e324ae4SSatish Balay *bap++ = *value++; 5550e324ae4SSatish Balay } 5568a84c255SSatish Balay } 557dd9472c6SBarry Smith } 558dd9472c6SBarry Smith } 559f1241b54SBarry Smith goto noinsert2; 56092c4ed94SBarry Smith } 56192c4ed94SBarry Smith } 56289280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 56311ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 56492c4ed94SBarry Smith if (nrow >= rmax) { 56592c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 56692c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 56792c4ed94SBarry Smith Scalar *new_a; 56892c4ed94SBarry Smith 56911ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 57089280ab3SLois Curfman McInnes 57192c4ed94SBarry Smith /* malloc new storage space */ 57292c4ed94SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 57392c4ed94SBarry Smith new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 57492c4ed94SBarry Smith new_j = (int *) (new_a + bs2*new_nz); 57592c4ed94SBarry Smith new_i = new_j + new_nz; 57692c4ed94SBarry Smith 57792c4ed94SBarry Smith /* copy over old data into new slots */ 57892c4ed94SBarry Smith for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 57992c4ed94SBarry Smith for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 58092c4ed94SBarry Smith PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 58192c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 582dd9472c6SBarry Smith PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 58392c4ed94SBarry Smith PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 58492c4ed94SBarry Smith PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 585dd9472c6SBarry Smith PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 58692c4ed94SBarry Smith /* free up old matrix storage */ 58792c4ed94SBarry Smith PetscFree(a->a); 58892c4ed94SBarry Smith if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 58992c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 59092c4ed94SBarry Smith a->singlemalloc = 1; 59192c4ed94SBarry Smith 59292c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 59392c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 59492c4ed94SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 59592c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 59692c4ed94SBarry Smith a->reallocs++; 59792c4ed94SBarry Smith a->nz++; 59892c4ed94SBarry Smith } 59992c4ed94SBarry Smith N = nrow++ - 1; 60092c4ed94SBarry Smith /* shift up all the later entries in this row */ 60192c4ed94SBarry Smith for ( ii=N; ii>=i; ii-- ) { 60292c4ed94SBarry Smith rp[ii+1] = rp[ii]; 60392c4ed94SBarry Smith PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 60492c4ed94SBarry Smith } 60592c4ed94SBarry Smith if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 60692c4ed94SBarry Smith rp[i] = col; 6078a84c255SSatish Balay bap = ap + bs2*i; 6080e324ae4SSatish Balay if (roworiented) { 609dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval) { 610dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 6110e324ae4SSatish Balay bap[jj] = *value++; 612dd9472c6SBarry Smith } 613dd9472c6SBarry Smith } 6140e324ae4SSatish Balay } else { 615dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 616dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 6170e324ae4SSatish Balay *bap++ = *value++; 6180e324ae4SSatish Balay } 619dd9472c6SBarry Smith } 620dd9472c6SBarry Smith } 621f1241b54SBarry Smith noinsert2:; 62292c4ed94SBarry Smith low = i; 62392c4ed94SBarry Smith } 62492c4ed94SBarry Smith ailen[row] = nrow; 62592c4ed94SBarry Smith } 62692c4ed94SBarry Smith return 0; 62792c4ed94SBarry Smith } 62892c4ed94SBarry Smith 62992c4ed94SBarry Smith #undef __FUNC__ 630d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" 6318f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 6320b824a48SSatish Balay { 6330b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6340b824a48SSatish Balay *m = a->m; *n = a->n; 6350b824a48SSatish Balay return 0; 6360b824a48SSatish Balay } 6370b824a48SSatish Balay 6385615d1e5SSatish Balay #undef __FUNC__ 639d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 6408f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 6410b824a48SSatish Balay { 6420b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6430b824a48SSatish Balay *m = 0; *n = a->m; 6440b824a48SSatish Balay return 0; 6450b824a48SSatish Balay } 6460b824a48SSatish Balay 6475615d1e5SSatish Balay #undef __FUNC__ 6485615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ" 6499867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6509867e207SSatish Balay { 6519867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6527e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 6539867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 6549867e207SSatish Balay 6559867e207SSatish Balay bs = a->bs; 6569867e207SSatish Balay ai = a->i; 6579867e207SSatish Balay aj = a->j; 6589867e207SSatish Balay aa = a->a; 6599df24120SSatish Balay bs2 = a->bs2; 6609867e207SSatish Balay 661e3372554SBarry Smith if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 6629867e207SSatish Balay 6639867e207SSatish Balay bn = row/bs; /* Block number */ 6649867e207SSatish Balay bp = row % bs; /* Block Position */ 6659867e207SSatish Balay M = ai[bn+1] - ai[bn]; 6669867e207SSatish Balay *nz = bs*M; 6679867e207SSatish Balay 6689867e207SSatish Balay if (v) { 6699867e207SSatish Balay *v = 0; 6709867e207SSatish Balay if (*nz) { 6719867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 6729867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6739867e207SSatish Balay v_i = *v + i*bs; 6747e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 6757e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 6769867e207SSatish Balay } 6779867e207SSatish Balay } 6789867e207SSatish Balay } 6799867e207SSatish Balay 6809867e207SSatish Balay if (idx) { 6819867e207SSatish Balay *idx = 0; 6829867e207SSatish Balay if (*nz) { 6839867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 6849867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6859867e207SSatish Balay idx_i = *idx + i*bs; 6869867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 6879867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 6889867e207SSatish Balay } 6899867e207SSatish Balay } 6909867e207SSatish Balay } 6919867e207SSatish Balay return 0; 6929867e207SSatish Balay } 6939867e207SSatish Balay 6945615d1e5SSatish Balay #undef __FUNC__ 695d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" 6969867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6979867e207SSatish Balay { 6989867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 6999867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 7009867e207SSatish Balay return 0; 7019867e207SSatish Balay } 702b6490206SBarry Smith 7035615d1e5SSatish Balay #undef __FUNC__ 7045615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ" 7058f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B) 706f2501298SSatish Balay { 707f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 708f2501298SSatish Balay Mat C; 709f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 7109df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 711f2501298SSatish Balay Scalar *array=a->a; 712f2501298SSatish Balay 713f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 714e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 715f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 716f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 717a7c10996SSatish Balay 718a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 719f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 720f2501298SSatish Balay PetscFree(col); 721f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 722f2501298SSatish Balay cols = rows + bs; 723f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 724f2501298SSatish Balay cols[0] = i*bs; 725f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 726f2501298SSatish Balay len = ai[i+1] - ai[i]; 727f2501298SSatish Balay for ( j=0; j<len; j++ ) { 728f2501298SSatish Balay rows[0] = (*aj++)*bs; 729f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 730f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 731f2501298SSatish Balay array += bs2; 732f2501298SSatish Balay } 733f2501298SSatish Balay } 7341073c447SSatish Balay PetscFree(rows); 735f2501298SSatish Balay 7366d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7376d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 738f2501298SSatish Balay 739f2501298SSatish Balay if (B != PETSC_NULL) { 740f2501298SSatish Balay *B = C; 741f2501298SSatish Balay } else { 742f2501298SSatish Balay /* This isn't really an in-place transpose */ 743f2501298SSatish Balay PetscFree(a->a); 744f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 745f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 746f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 747f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 748f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 749f2501298SSatish Balay PetscFree(a); 750f09e8eb9SSatish Balay PetscMemcpy(A,C,sizeof(struct _p_Mat)); 751f2501298SSatish Balay PetscHeaderDestroy(C); 752f2501298SSatish Balay } 753f2501298SSatish Balay return 0; 754f2501298SSatish Balay } 755f2501298SSatish Balay 756f2501298SSatish Balay 7575615d1e5SSatish Balay #undef __FUNC__ 7585615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 7598f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 760584200bdSSatish Balay { 761584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 762584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 763a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 76443ee02c3SBarry Smith int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 765584200bdSSatish Balay Scalar *aa = a->a, *ap; 766584200bdSSatish Balay 7676d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 768584200bdSSatish Balay 76943ee02c3SBarry Smith if (m) rmax = ailen[0]; 770584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 771584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 772584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 773d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 774584200bdSSatish Balay if (fshift) { 775a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 776584200bdSSatish Balay N = ailen[i]; 777584200bdSSatish Balay for ( j=0; j<N; j++ ) { 778584200bdSSatish Balay ip[j-fshift] = ip[j]; 7797e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 780584200bdSSatish Balay } 781584200bdSSatish Balay } 782584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 783584200bdSSatish Balay } 784584200bdSSatish Balay if (mbs) { 785584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 786584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 787584200bdSSatish Balay } 788584200bdSSatish Balay /* reset ilen and imax for each row */ 789584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 790584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 791584200bdSSatish Balay } 792a7c10996SSatish Balay a->nz = ai[mbs]; 793584200bdSSatish Balay 794584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 795584200bdSSatish Balay if (fshift && a->diag) { 796584200bdSSatish Balay PetscFree(a->diag); 797584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 798584200bdSSatish Balay a->diag = 0; 799584200bdSSatish Balay } 8003d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 801219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 8023d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 803584200bdSSatish Balay a->reallocs); 804d402145bSBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 805e2f3b5e9SSatish Balay a->reallocs = 0; 8064e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 8074e220ebcSLois Curfman McInnes 808584200bdSSatish Balay return 0; 809584200bdSSatish Balay } 810584200bdSSatish Balay 8115615d1e5SSatish Balay #undef __FUNC__ 8125615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ" 8138f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A) 8142593348eSBarry Smith { 815b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8169df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 8172593348eSBarry Smith return 0; 8182593348eSBarry Smith } 8192593348eSBarry Smith 8205615d1e5SSatish Balay #undef __FUNC__ 821d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" 822b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 8232593348eSBarry Smith { 8242593348eSBarry Smith Mat A = (Mat) obj; 825b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8262593348eSBarry Smith 8272593348eSBarry Smith #if defined(PETSC_LOG) 8282593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 8292593348eSBarry Smith #endif 8302593348eSBarry Smith PetscFree(a->a); 8312593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 8322593348eSBarry Smith if (a->diag) PetscFree(a->diag); 8332593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 8342593348eSBarry Smith if (a->imax) PetscFree(a->imax); 8352593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 836de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 8372593348eSBarry Smith PetscFree(a); 838f2655603SLois Curfman McInnes PLogObjectDestroy(A); 839f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 8402593348eSBarry Smith return 0; 8412593348eSBarry Smith } 8422593348eSBarry Smith 8435615d1e5SSatish Balay #undef __FUNC__ 844d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" 8458f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op) 8462593348eSBarry Smith { 847b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8486d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 8496d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 8506d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 851219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 8526d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 853c2653b3dSLois Curfman McInnes else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 85496854ed6SLois Curfman McInnes else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 8556d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 8566d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 857219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 8586d4a8577SBarry Smith op == MAT_SYMMETRIC || 8596d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 86090f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 8612b362799SSatish Balay op == MAT_IGNORE_OFF_PROC_ENTRIES) 86294a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 8636d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 864e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 8652593348eSBarry Smith else 866e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 8672593348eSBarry Smith return 0; 8682593348eSBarry Smith } 8692593348eSBarry Smith 8702593348eSBarry Smith 8712593348eSBarry Smith /* -------------------------------------------------------*/ 8722593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 8732593348eSBarry Smith /* -------------------------------------------------------*/ 874b6490206SBarry Smith #include "pinclude/plapack.h" 875b6490206SBarry Smith 8765615d1e5SSatish Balay #undef __FUNC__ 8775615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1" 87839b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 8792593348eSBarry Smith { 880b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 88139b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 882c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 8832593348eSBarry Smith 884c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 885c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 886b6490206SBarry Smith 887119a934aSSatish Balay idx = a->j; 888119a934aSSatish Balay v = a->a; 889119a934aSSatish Balay ii = a->i; 890119a934aSSatish Balay 891119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 892119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 893119a934aSSatish Balay sum = 0.0; 894119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 8951a6a6d98SBarry Smith z[i] = sum; 896119a934aSSatish Balay } 897c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 898c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 89939b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 90039b95ed1SBarry Smith return 0; 90139b95ed1SBarry Smith } 90239b95ed1SBarry Smith 9035615d1e5SSatish Balay #undef __FUNC__ 9045615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2" 90539b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 90639b95ed1SBarry Smith { 90739b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 90839b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 90939b95ed1SBarry Smith register Scalar x1,x2; 91039b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 911c16cb8f2SBarry Smith int j,n; 91239b95ed1SBarry Smith 913c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 914c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 91539b95ed1SBarry Smith 91639b95ed1SBarry Smith idx = a->j; 91739b95ed1SBarry Smith v = a->a; 91839b95ed1SBarry Smith ii = a->i; 91939b95ed1SBarry Smith 920119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 921119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 922119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 923119a934aSSatish Balay for ( j=0; j<n; j++ ) { 924119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 925119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 926119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 927119a934aSSatish Balay v += 4; 928119a934aSSatish Balay } 9291a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 930119a934aSSatish Balay z += 2; 931119a934aSSatish Balay } 932c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 933c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 93439b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 93539b95ed1SBarry Smith return 0; 93639b95ed1SBarry Smith } 93739b95ed1SBarry Smith 9385615d1e5SSatish Balay #undef __FUNC__ 9395615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3" 94039b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 94139b95ed1SBarry Smith { 94239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 94339b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 944c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 94539b95ed1SBarry Smith 946c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 947c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 94839b95ed1SBarry Smith 94939b95ed1SBarry Smith idx = a->j; 95039b95ed1SBarry Smith v = a->a; 95139b95ed1SBarry Smith ii = a->i; 95239b95ed1SBarry Smith 953119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 954119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 955119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 956119a934aSSatish Balay for ( j=0; j<n; j++ ) { 957119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 958119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 959119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 960119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 961119a934aSSatish Balay v += 9; 962119a934aSSatish Balay } 9631a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 964119a934aSSatish Balay z += 3; 965119a934aSSatish Balay } 966c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 967c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 96839b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 96939b95ed1SBarry Smith return 0; 97039b95ed1SBarry Smith } 97139b95ed1SBarry Smith 9725615d1e5SSatish Balay #undef __FUNC__ 9735615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4" 97439b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 97539b95ed1SBarry Smith { 97639b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97739b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 97839b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 97939b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 980c16cb8f2SBarry Smith int j,n; 98139b95ed1SBarry Smith 982c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 983c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 98439b95ed1SBarry Smith 98539b95ed1SBarry Smith idx = a->j; 98639b95ed1SBarry Smith v = a->a; 98739b95ed1SBarry Smith ii = a->i; 98839b95ed1SBarry Smith 989119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 990119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 991119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 992119a934aSSatish Balay for ( j=0; j<n; j++ ) { 993119a934aSSatish Balay xb = x + 4*(*idx++); 994119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 995119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 996119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 997119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 998119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 999119a934aSSatish Balay v += 16; 1000119a934aSSatish Balay } 10011a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1002119a934aSSatish Balay z += 4; 1003119a934aSSatish Balay } 1004c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1005c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 100639b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 100739b95ed1SBarry Smith return 0; 100839b95ed1SBarry Smith } 100939b95ed1SBarry Smith 10105615d1e5SSatish Balay #undef __FUNC__ 10115615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5" 101239b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 101339b95ed1SBarry Smith { 101439b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101539b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 101639b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 1017c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 101839b95ed1SBarry Smith 1019c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1020c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 102139b95ed1SBarry Smith 102239b95ed1SBarry Smith idx = a->j; 102339b95ed1SBarry Smith v = a->a; 102439b95ed1SBarry Smith ii = a->i; 102539b95ed1SBarry Smith 1026119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1027119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1028119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1029119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1030119a934aSSatish Balay xb = x + 5*(*idx++); 1031119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1032119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1033119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1034119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1035119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1036119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1037119a934aSSatish Balay v += 25; 1038119a934aSSatish Balay } 10391a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1040119a934aSSatish Balay z += 5; 1041119a934aSSatish Balay } 1042c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1043c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 104439b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 104539b95ed1SBarry Smith return 0; 104639b95ed1SBarry Smith } 104739b95ed1SBarry Smith 10485615d1e5SSatish Balay #undef __FUNC__ 104948e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7" 105048e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 105148e9ddb2SSatish Balay { 105248e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 105348e9ddb2SSatish Balay register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 105448e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 105548e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 105648e9ddb2SSatish Balay 105748e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 105848e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 105948e9ddb2SSatish Balay 106048e9ddb2SSatish Balay idx = a->j; 106148e9ddb2SSatish Balay v = a->a; 106248e9ddb2SSatish Balay ii = a->i; 106348e9ddb2SSatish Balay 106448e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 106548e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 106648e9ddb2SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 106748e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 106848e9ddb2SSatish Balay xb = x + 7*(*idx++); 106948e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 107048e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 107148e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 107248e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 107348e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 107448e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 107548e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 107648e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 107748e9ddb2SSatish Balay v += 49; 107848e9ddb2SSatish Balay } 107948e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 108048e9ddb2SSatish Balay z += 7; 108148e9ddb2SSatish Balay } 108248e9ddb2SSatish Balay 108348e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 108448e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 108548e9ddb2SSatish Balay PLogFlops(98*a->nz - a->m); 108648e9ddb2SSatish Balay return 0; 108748e9ddb2SSatish Balay } 108848e9ddb2SSatish Balay 108948e9ddb2SSatish Balay #undef __FUNC__ 10905615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N" 109139b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 109239b95ed1SBarry Smith { 109339b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 109439b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 1095c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1096c16cb8f2SBarry Smith int _One = 1,ncols,k; 1097c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 109839b95ed1SBarry Smith 1099c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1100c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 110139b95ed1SBarry Smith 110239b95ed1SBarry Smith idx = a->j; 110339b95ed1SBarry Smith v = a->a; 110439b95ed1SBarry Smith ii = a->i; 110539b95ed1SBarry Smith 110639b95ed1SBarry Smith 1107119a934aSSatish Balay if (!a->mult_work) { 11083b547af2SSatish Balay k = PetscMax(a->m,a->n); 11092b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1110119a934aSSatish Balay } 1111119a934aSSatish Balay work = a->mult_work; 1112119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1113119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1114119a934aSSatish Balay ncols = n*bs; 1115119a934aSSatish Balay workt = work; 1116119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1117119a934aSSatish Balay xb = x + bs*(*idx++); 1118119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1119119a934aSSatish Balay workt += bs; 1120119a934aSSatish Balay } 11211a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1122119a934aSSatish Balay v += n*bs2; 1123119a934aSSatish Balay z += bs; 1124119a934aSSatish Balay } 1125c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1126c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 11271a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 1128bb42667fSSatish Balay return 0; 1129bb42667fSSatish Balay } 1130bb42667fSSatish Balay 11315615d1e5SSatish Balay #undef __FUNC__ 11325615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1133f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1134f44d3a6dSSatish Balay { 1135f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1136f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 1137c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 1138f44d3a6dSSatish Balay 1139c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1140c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1141c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1142f44d3a6dSSatish Balay 1143f44d3a6dSSatish Balay idx = a->j; 1144f44d3a6dSSatish Balay v = a->a; 1145f44d3a6dSSatish Balay ii = a->i; 1146f44d3a6dSSatish Balay 1147f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1148f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1149f44d3a6dSSatish Balay sum = y[i]; 1150f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 1151f44d3a6dSSatish Balay z[i] = sum; 1152f44d3a6dSSatish Balay } 1153c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1154c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1155c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1156f44d3a6dSSatish Balay PLogFlops(2*a->nz); 1157f44d3a6dSSatish Balay return 0; 1158f44d3a6dSSatish Balay } 1159f44d3a6dSSatish Balay 11605615d1e5SSatish Balay #undef __FUNC__ 11615615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1162f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1163f44d3a6dSSatish Balay { 1164f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1165f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1166f44d3a6dSSatish Balay register Scalar x1,x2; 1167f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1168c16cb8f2SBarry Smith int j,n; 1169f44d3a6dSSatish Balay 1170c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1171c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1172c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1173f44d3a6dSSatish Balay 1174f44d3a6dSSatish Balay idx = a->j; 1175f44d3a6dSSatish Balay v = a->a; 1176f44d3a6dSSatish Balay ii = a->i; 1177f44d3a6dSSatish Balay 1178f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1179f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1180f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 1181f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1182f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1183f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 1184f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 1185f44d3a6dSSatish Balay v += 4; 1186f44d3a6dSSatish Balay } 1187f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 1188f44d3a6dSSatish Balay z += 2; y += 2; 1189f44d3a6dSSatish Balay } 1190c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1191c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1192c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1193f44d3a6dSSatish Balay PLogFlops(4*a->nz); 1194f44d3a6dSSatish Balay return 0; 1195f44d3a6dSSatish Balay } 1196f44d3a6dSSatish Balay 11975615d1e5SSatish Balay #undef __FUNC__ 11985615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1199f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1200f44d3a6dSSatish Balay { 1201f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1202f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1203c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1204f44d3a6dSSatish Balay 1205c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1206c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1207c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1208f44d3a6dSSatish Balay 1209f44d3a6dSSatish Balay idx = a->j; 1210f44d3a6dSSatish Balay v = a->a; 1211f44d3a6dSSatish Balay ii = a->i; 1212f44d3a6dSSatish Balay 1213f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1214f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1215f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1216f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1217f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1218f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1219f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1220f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1221f44d3a6dSSatish Balay v += 9; 1222f44d3a6dSSatish Balay } 1223f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 1224f44d3a6dSSatish Balay z += 3; y += 3; 1225f44d3a6dSSatish Balay } 1226c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1227c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1228c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1229f44d3a6dSSatish Balay PLogFlops(18*a->nz); 1230f44d3a6dSSatish Balay return 0; 1231f44d3a6dSSatish Balay } 1232f44d3a6dSSatish Balay 12335615d1e5SSatish Balay #undef __FUNC__ 12345615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1235f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1236f44d3a6dSSatish Balay { 1237f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1238f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1239f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 1240f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1241c16cb8f2SBarry Smith int j,n; 1242f44d3a6dSSatish Balay 1243c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1244c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1245c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1246f44d3a6dSSatish Balay 1247f44d3a6dSSatish Balay idx = a->j; 1248f44d3a6dSSatish Balay v = a->a; 1249f44d3a6dSSatish Balay ii = a->i; 1250f44d3a6dSSatish Balay 1251f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1252f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1253f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1254f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1255f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1256f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1257f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1258f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1259f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1260f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1261f44d3a6dSSatish Balay v += 16; 1262f44d3a6dSSatish Balay } 1263f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1264f44d3a6dSSatish Balay z += 4; y += 4; 1265f44d3a6dSSatish Balay } 1266c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1267c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1268c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1269f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1270f44d3a6dSSatish Balay return 0; 1271f44d3a6dSSatish Balay } 1272f44d3a6dSSatish Balay 12735615d1e5SSatish Balay #undef __FUNC__ 12745615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1275f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1276f44d3a6dSSatish Balay { 1277f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1278f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1279f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1280c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1281f44d3a6dSSatish Balay 1282c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1283c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1284c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1285f44d3a6dSSatish Balay 1286f44d3a6dSSatish Balay idx = a->j; 1287f44d3a6dSSatish Balay v = a->a; 1288f44d3a6dSSatish Balay ii = a->i; 1289f44d3a6dSSatish Balay 1290f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1291f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1292f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1293f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1294f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1295f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1296f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1297f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1298f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1299f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1300f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1301f44d3a6dSSatish Balay v += 25; 1302f44d3a6dSSatish Balay } 1303f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1304f44d3a6dSSatish Balay z += 5; y += 5; 1305f44d3a6dSSatish Balay } 1306c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1307c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1308c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1309f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1310f44d3a6dSSatish Balay return 0; 1311f44d3a6dSSatish Balay } 1312f44d3a6dSSatish Balay 13135615d1e5SSatish Balay #undef __FUNC__ 131448e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 131548e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 131648e9ddb2SSatish Balay { 131748e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 131848e9ddb2SSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 131948e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 132048e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 132148e9ddb2SSatish Balay 132248e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 132348e9ddb2SSatish Balay VecGetArray_Fast(yy,y); 132448e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 132548e9ddb2SSatish Balay 132648e9ddb2SSatish Balay idx = a->j; 132748e9ddb2SSatish Balay v = a->a; 132848e9ddb2SSatish Balay ii = a->i; 132948e9ddb2SSatish Balay 133048e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 133148e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 133248e9ddb2SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 133348e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 133448e9ddb2SSatish Balay xb = x + 7*(*idx++); 133548e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 133648e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 133748e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 133848e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 133948e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 134048e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 134148e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 134248e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 134348e9ddb2SSatish Balay v += 49; 134448e9ddb2SSatish Balay } 134548e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 134648e9ddb2SSatish Balay z += 7; y += 7; 134748e9ddb2SSatish Balay } 134848e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 134948e9ddb2SSatish Balay VecRestoreArray_Fast(yy,y); 135048e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 135148e9ddb2SSatish Balay PLogFlops(98*a->nz); 135248e9ddb2SSatish Balay return 0; 135348e9ddb2SSatish Balay } 135448e9ddb2SSatish Balay 135548e9ddb2SSatish Balay 135648e9ddb2SSatish Balay #undef __FUNC__ 13575615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1358f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1359f44d3a6dSSatish Balay { 1360f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1361f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1362f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1363f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1364f44d3a6dSSatish Balay 1365f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1366f44d3a6dSSatish Balay 1367c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1368c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1369f44d3a6dSSatish Balay 1370f44d3a6dSSatish Balay idx = a->j; 1371f44d3a6dSSatish Balay v = a->a; 1372f44d3a6dSSatish Balay ii = a->i; 1373f44d3a6dSSatish Balay 1374f44d3a6dSSatish Balay 1375f44d3a6dSSatish Balay if (!a->mult_work) { 1376f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1377f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1378f44d3a6dSSatish Balay } 1379f44d3a6dSSatish Balay work = a->mult_work; 1380f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1381f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1382f44d3a6dSSatish Balay ncols = n*bs; 1383f44d3a6dSSatish Balay workt = work; 1384f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1385f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1386f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1387f44d3a6dSSatish Balay workt += bs; 1388f44d3a6dSSatish Balay } 1389f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1390f44d3a6dSSatish Balay v += n*bs2; 1391f44d3a6dSSatish Balay z += bs; 1392f44d3a6dSSatish Balay } 1393c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1394c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1395f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1396f44d3a6dSSatish Balay return 0; 1397f44d3a6dSSatish Balay } 1398f44d3a6dSSatish Balay 13995615d1e5SSatish Balay #undef __FUNC__ 14005615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ" 14018f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1402bb42667fSSatish Balay { 1403bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14041a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1405bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1406bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 14079df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1408119a934aSSatish Balay 1409119a934aSSatish Balay 141090f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 141190f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1412bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1413bb42667fSSatish Balay 1414119a934aSSatish Balay idx = a->j; 1415119a934aSSatish Balay v = a->a; 1416119a934aSSatish Balay ii = a->i; 1417119a934aSSatish Balay 1418119a934aSSatish Balay switch (bs) { 1419119a934aSSatish Balay case 1: 1420119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1421119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1422119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1423119a934aSSatish Balay ib = idx + ai[i]; 1424119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1425bb1453f0SSatish Balay rval = ib[j]; 1426bb1453f0SSatish Balay z[rval] += *v++ * x1; 1427119a934aSSatish Balay } 1428119a934aSSatish Balay } 1429119a934aSSatish Balay break; 1430119a934aSSatish Balay case 2: 1431119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1432119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1433119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1434119a934aSSatish Balay ib = idx + ai[i]; 1435119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1436119a934aSSatish Balay rval = ib[j]*2; 1437bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1438bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1439119a934aSSatish Balay v += 4; 1440119a934aSSatish Balay } 1441119a934aSSatish Balay } 1442119a934aSSatish Balay break; 1443119a934aSSatish Balay case 3: 1444119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1445119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1446119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1447119a934aSSatish Balay ib = idx + ai[i]; 1448119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1449119a934aSSatish Balay rval = ib[j]*3; 1450bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1451bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1452bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1453119a934aSSatish Balay v += 9; 1454119a934aSSatish Balay } 1455119a934aSSatish Balay } 1456119a934aSSatish Balay break; 1457119a934aSSatish Balay case 4: 1458119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1459119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1460119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1461119a934aSSatish Balay ib = idx + ai[i]; 1462119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1463119a934aSSatish Balay rval = ib[j]*4; 1464bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1465bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1466bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1467bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1468119a934aSSatish Balay v += 16; 1469119a934aSSatish Balay } 1470119a934aSSatish Balay } 1471119a934aSSatish Balay break; 1472119a934aSSatish Balay case 5: 1473119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1474119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1475119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1476119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1477119a934aSSatish Balay ib = idx + ai[i]; 1478119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1479119a934aSSatish Balay rval = ib[j]*5; 1480bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1481bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1482bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1483bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1484bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1485119a934aSSatish Balay v += 25; 1486119a934aSSatish Balay } 1487119a934aSSatish Balay } 1488119a934aSSatish Balay break; 1489119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1490119a934aSSatish Balay default: { 1491119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1492119a934aSSatish Balay if (!a->mult_work) { 14933b547af2SSatish Balay k = PetscMax(a->m,a->n); 1494bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1495119a934aSSatish Balay CHKPTRQ(a->mult_work); 1496119a934aSSatish Balay } 1497119a934aSSatish Balay work = a->mult_work; 1498119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1499119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1500119a934aSSatish Balay ncols = n*bs; 1501119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1502119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1503119a934aSSatish Balay v += n*bs2; 1504119a934aSSatish Balay x += bs; 1505119a934aSSatish Balay workt = work; 1506119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1507119a934aSSatish Balay zb = z + bs*(*idx++); 1508bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1509119a934aSSatish Balay workt += bs; 1510119a934aSSatish Balay } 1511119a934aSSatish Balay } 1512119a934aSSatish Balay } 1513119a934aSSatish Balay } 15140419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 15150419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1516faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1517faf6580fSSatish Balay return 0; 1518faf6580fSSatish Balay } 15191c351548SSatish Balay 15205615d1e5SSatish Balay #undef __FUNC__ 15215615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 15228f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1523faf6580fSSatish Balay { 1524faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1525faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1526faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1527faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1528faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1529faf6580fSSatish Balay 1530faf6580fSSatish Balay 1531faf6580fSSatish Balay 153290f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 153390f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1534faf6580fSSatish Balay 1535648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1536648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1537648c1d95SSatish Balay 1538faf6580fSSatish Balay 1539faf6580fSSatish Balay idx = a->j; 1540faf6580fSSatish Balay v = a->a; 1541faf6580fSSatish Balay ii = a->i; 1542faf6580fSSatish Balay 1543faf6580fSSatish Balay switch (bs) { 1544faf6580fSSatish Balay case 1: 1545faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1546faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1547faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1548faf6580fSSatish Balay ib = idx + ai[i]; 1549faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1550faf6580fSSatish Balay rval = ib[j]; 1551faf6580fSSatish Balay z[rval] += *v++ * x1; 1552faf6580fSSatish Balay } 1553faf6580fSSatish Balay } 1554faf6580fSSatish Balay break; 1555faf6580fSSatish Balay case 2: 1556faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1557faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1558faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1559faf6580fSSatish Balay ib = idx + ai[i]; 1560faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1561faf6580fSSatish Balay rval = ib[j]*2; 1562faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1563faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1564faf6580fSSatish Balay v += 4; 1565faf6580fSSatish Balay } 1566faf6580fSSatish Balay } 1567faf6580fSSatish Balay break; 1568faf6580fSSatish Balay case 3: 1569faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1570faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1571faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1572faf6580fSSatish Balay ib = idx + ai[i]; 1573faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1574faf6580fSSatish Balay rval = ib[j]*3; 1575faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1576faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1577faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1578faf6580fSSatish Balay v += 9; 1579faf6580fSSatish Balay } 1580faf6580fSSatish Balay } 1581faf6580fSSatish Balay break; 1582faf6580fSSatish Balay case 4: 1583faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1584faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1585faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1586faf6580fSSatish Balay ib = idx + ai[i]; 1587faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1588faf6580fSSatish Balay rval = ib[j]*4; 1589faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1590faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1591faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1592faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1593faf6580fSSatish Balay v += 16; 1594faf6580fSSatish Balay } 1595faf6580fSSatish Balay } 1596faf6580fSSatish Balay break; 1597faf6580fSSatish Balay case 5: 1598faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1599faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1600faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1601faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1602faf6580fSSatish Balay ib = idx + ai[i]; 1603faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1604faf6580fSSatish Balay rval = ib[j]*5; 1605faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1606faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1607faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1608faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1609faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1610faf6580fSSatish Balay v += 25; 1611faf6580fSSatish Balay } 1612faf6580fSSatish Balay } 1613faf6580fSSatish Balay break; 1614faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1615faf6580fSSatish Balay default: { 1616faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1617faf6580fSSatish Balay if (!a->mult_work) { 1618faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1619faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1620faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1621faf6580fSSatish Balay } 1622faf6580fSSatish Balay work = a->mult_work; 1623faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1624faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1625faf6580fSSatish Balay ncols = n*bs; 1626faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1627faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1628faf6580fSSatish Balay v += n*bs2; 1629faf6580fSSatish Balay x += bs; 1630faf6580fSSatish Balay workt = work; 1631faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1632faf6580fSSatish Balay zb = z + bs*(*idx++); 1633faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1634faf6580fSSatish Balay workt += bs; 1635faf6580fSSatish Balay } 1636faf6580fSSatish Balay } 1637faf6580fSSatish Balay } 1638faf6580fSSatish Balay } 1639faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1640faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1641faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 16422593348eSBarry Smith return 0; 16432593348eSBarry Smith } 16442593348eSBarry Smith 16455615d1e5SSatish Balay #undef __FUNC__ 1646d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" 16478f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 16482593348eSBarry Smith { 1649b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16504e220ebcSLois Curfman McInnes 16514e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 16524e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 16534e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 16544e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 16554e220ebcSLois Curfman McInnes info->block_size = a->bs2; 16564e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 16574e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 16584e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 16594e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 16604e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 16614e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 16624e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 16634e220ebcSLois Curfman McInnes info->memory = A->mem; 16644e220ebcSLois Curfman McInnes if (A->factor) { 16654e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 16664e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 16674e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 16684e220ebcSLois Curfman McInnes } else { 16694e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 16704e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16714e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16724e220ebcSLois Curfman McInnes } 16732593348eSBarry Smith return 0; 16742593348eSBarry Smith } 16752593348eSBarry Smith 16765615d1e5SSatish Balay #undef __FUNC__ 16775615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ" 16788f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 167991d316f6SSatish Balay { 168091d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 168191d316f6SSatish Balay 1682e3372554SBarry Smith if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 168391d316f6SSatish Balay 168491d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 168591d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1686a7c10996SSatish Balay (a->nz != b->nz)) { 168791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 168891d316f6SSatish Balay } 168991d316f6SSatish Balay 169091d316f6SSatish Balay /* if the a->i are the same */ 169191d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 169291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 169391d316f6SSatish Balay } 169491d316f6SSatish Balay 169591d316f6SSatish Balay /* if a->j are the same */ 169691d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 169791d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 169891d316f6SSatish Balay } 169991d316f6SSatish Balay 170091d316f6SSatish Balay /* if a->a are the same */ 170191d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 170291d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 170391d316f6SSatish Balay } 170491d316f6SSatish Balay *flg = PETSC_TRUE; 170591d316f6SSatish Balay return 0; 170691d316f6SSatish Balay 170791d316f6SSatish Balay } 170891d316f6SSatish Balay 17095615d1e5SSatish Balay #undef __FUNC__ 17105615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 17118f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 171291d316f6SSatish Balay { 171391d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17147e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 171517e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 171617e48fc4SSatish Balay 17170513a670SBarry Smith if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 171817e48fc4SSatish Balay bs = a->bs; 171917e48fc4SSatish Balay aa = a->a; 172017e48fc4SSatish Balay ai = a->i; 172117e48fc4SSatish Balay aj = a->j; 172217e48fc4SSatish Balay ambs = a->mbs; 17239df24120SSatish Balay bs2 = a->bs2; 172491d316f6SSatish Balay 172591d316f6SSatish Balay VecSet(&zero,v); 172690f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1727e3372554SBarry Smith if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 172817e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 172917e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 173017e48fc4SSatish Balay if (aj[j] == i) { 173117e48fc4SSatish Balay row = i*bs; 17327e67e3f9SSatish Balay aa_j = aa+j*bs2; 17337e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 173491d316f6SSatish Balay break; 173591d316f6SSatish Balay } 173691d316f6SSatish Balay } 173791d316f6SSatish Balay } 173891d316f6SSatish Balay return 0; 173991d316f6SSatish Balay } 174091d316f6SSatish Balay 17415615d1e5SSatish Balay #undef __FUNC__ 17425615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 17438f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 17449867e207SSatish Balay { 17459867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17469867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 17477e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 17489867e207SSatish Balay 17499867e207SSatish Balay ai = a->i; 17509867e207SSatish Balay aj = a->j; 17519867e207SSatish Balay aa = a->a; 17529867e207SSatish Balay m = a->m; 17539867e207SSatish Balay n = a->n; 17549867e207SSatish Balay bs = a->bs; 17559867e207SSatish Balay mbs = a->mbs; 17569df24120SSatish Balay bs2 = a->bs2; 17579867e207SSatish Balay if (ll) { 175890f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1759e3372554SBarry Smith if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 17609867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17619867e207SSatish Balay M = ai[i+1] - ai[i]; 17629867e207SSatish Balay li = l + i*bs; 17637e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17649867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17657e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 17669867e207SSatish Balay (*v++) *= li[k%bs]; 17679867e207SSatish Balay } 17689867e207SSatish Balay } 17699867e207SSatish Balay } 17709867e207SSatish Balay } 17719867e207SSatish Balay 17729867e207SSatish Balay if (rr) { 177390f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1774e3372554SBarry Smith if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 17759867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17769867e207SSatish Balay M = ai[i+1] - ai[i]; 17777e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17789867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17799867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 17809867e207SSatish Balay for ( k=0; k<bs; k++ ) { 17819867e207SSatish Balay x = ri[k]; 17829867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 17839867e207SSatish Balay } 17849867e207SSatish Balay } 17859867e207SSatish Balay } 17869867e207SSatish Balay } 17879867e207SSatish Balay return 0; 17889867e207SSatish Balay } 17899867e207SSatish Balay 17909867e207SSatish Balay 1791b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1792b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 17932a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1794736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1795736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 17961a6a6d98SBarry Smith 17971a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 17981a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 17991a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 18001a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 18011a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 18021a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 180348e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 18041a6a6d98SBarry Smith 18057fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 18067fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 18077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 18087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 18097fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 18107fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 18112593348eSBarry Smith 18125615d1e5SSatish Balay #undef __FUNC__ 18135615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 18148f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 18152593348eSBarry Smith { 1816b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18172593348eSBarry Smith Scalar *v = a->a; 18182593348eSBarry Smith double sum = 0.0; 18199df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 18202593348eSBarry Smith 18212593348eSBarry Smith if (type == NORM_FROBENIUS) { 18220419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 18232593348eSBarry Smith #if defined(PETSC_COMPLEX) 18242593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 18252593348eSBarry Smith #else 18262593348eSBarry Smith sum += (*v)*(*v); v++; 18272593348eSBarry Smith #endif 18282593348eSBarry Smith } 18292593348eSBarry Smith *norm = sqrt(sum); 18302593348eSBarry Smith } 18312593348eSBarry Smith else { 1832e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 18332593348eSBarry Smith } 18342593348eSBarry Smith return 0; 18352593348eSBarry Smith } 18362593348eSBarry Smith 18373eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 18382593348eSBarry Smith /* 18392593348eSBarry Smith note: This can only work for identity for row and col. It would 18402593348eSBarry Smith be good to check this and otherwise generate an error. 18412593348eSBarry Smith */ 18425615d1e5SSatish Balay #undef __FUNC__ 18435615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ" 18448f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 18452593348eSBarry Smith { 1846b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18472593348eSBarry Smith Mat outA; 1848de6a44a3SBarry Smith int ierr; 18492593348eSBarry Smith 1850e3372554SBarry Smith if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 18512593348eSBarry Smith 18522593348eSBarry Smith outA = inA; 18532593348eSBarry Smith inA->factor = FACTOR_LU; 18542593348eSBarry Smith a->row = row; 18552593348eSBarry Smith a->col = col; 18562593348eSBarry Smith 1857eed86810SBarry Smith if (!a->solve_work) { 1858de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1859eed86810SBarry Smith PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1860eed86810SBarry Smith } 18612593348eSBarry Smith 18622593348eSBarry Smith if (!a->diag) { 1863b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 18642593348eSBarry Smith } 18657fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 18663eee16b0SBarry Smith 18673eee16b0SBarry Smith /* 18683eee16b0SBarry Smith Blocksize 4 has a special faster solver for ILU(0) factorization 18693eee16b0SBarry Smith with natural ordering 18703eee16b0SBarry Smith */ 18713eee16b0SBarry Smith if (a->bs == 4) { 18723eee16b0SBarry Smith inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 18733eee16b0SBarry Smith } 18743eee16b0SBarry Smith 18752593348eSBarry Smith return 0; 18762593348eSBarry Smith } 18772593348eSBarry Smith 18785615d1e5SSatish Balay #undef __FUNC__ 18795615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ" 18808f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 18812593348eSBarry Smith { 1882b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18839df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1884b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1885b6490206SBarry Smith PLogFlops(totalnz); 18862593348eSBarry Smith return 0; 18872593348eSBarry Smith } 18882593348eSBarry Smith 18895615d1e5SSatish Balay #undef __FUNC__ 18905615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ" 18918f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 18927e67e3f9SSatish Balay { 18937e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18947e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1895a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 18969df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 18977e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 18987e67e3f9SSatish Balay 18997e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 19007e67e3f9SSatish Balay row = im[k]; brow = row/bs; 1901e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 1902e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 1903a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 19047e67e3f9SSatish Balay nrow = ailen[brow]; 19057e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 1906e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1907e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1908a7c10996SSatish Balay col = in[l] ; 19097e67e3f9SSatish Balay bcol = col/bs; 19107e67e3f9SSatish Balay cidx = col%bs; 19117e67e3f9SSatish Balay ridx = row%bs; 19127e67e3f9SSatish Balay high = nrow; 19137e67e3f9SSatish Balay low = 0; /* assume unsorted */ 19147e67e3f9SSatish Balay while (high-low > 5) { 19157e67e3f9SSatish Balay t = (low+high)/2; 19167e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 19177e67e3f9SSatish Balay else low = t; 19187e67e3f9SSatish Balay } 19197e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 19207e67e3f9SSatish Balay if (rp[i] > bcol) break; 19217e67e3f9SSatish Balay if (rp[i] == bcol) { 19227e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 19237e67e3f9SSatish Balay goto finished; 19247e67e3f9SSatish Balay } 19257e67e3f9SSatish Balay } 19267e67e3f9SSatish Balay *v++ = zero; 19277e67e3f9SSatish Balay finished:; 19287e67e3f9SSatish Balay } 19297e67e3f9SSatish Balay } 19307e67e3f9SSatish Balay return 0; 19317e67e3f9SSatish Balay } 19327e67e3f9SSatish Balay 19335615d1e5SSatish Balay #undef __FUNC__ 1934d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 19358f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 19365a838052SSatish Balay { 19375a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 19385a838052SSatish Balay *bs = baij->bs; 19395a838052SSatish Balay return 0; 19405a838052SSatish Balay } 19415a838052SSatish Balay 1942d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 19435615d1e5SSatish Balay #undef __FUNC__ 19445615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1945d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1946d9b7c43dSSatish Balay { 1947d9b7c43dSSatish Balay int i,row; 1948d9b7c43dSSatish Balay row = idx[0]; 1949d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1950d9b7c43dSSatish Balay 1951d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1952d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1953d9b7c43dSSatish Balay } 1954d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1955d9b7c43dSSatish Balay return 0; 1956d9b7c43dSSatish Balay } 1957d9b7c43dSSatish Balay 19585615d1e5SSatish Balay #undef __FUNC__ 19595615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ" 19608f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1961d9b7c43dSSatish Balay { 1962d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1963d9b7c43dSSatish Balay IS is_local; 1964d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1965d9b7c43dSSatish Balay PetscTruth flg; 1966d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1967d9b7c43dSSatish Balay 1968d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1969d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1970d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1971537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1972d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1973d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1974d9b7c43dSSatish Balay 1975d9b7c43dSSatish Balay i = 0; 1976d9b7c43dSSatish Balay while (i < is_n) { 1977e3372554SBarry Smith if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1978d9b7c43dSSatish Balay flg = PETSC_FALSE; 1979d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1980d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1981d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1982d9b7c43dSSatish Balay i += bs; 1983d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1984d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1985d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1986d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1987d9b7c43dSSatish Balay aa[0] = zero; 1988d9b7c43dSSatish Balay aa+=bs; 1989d9b7c43dSSatish Balay } 1990d9b7c43dSSatish Balay i++; 1991d9b7c43dSSatish Balay } 1992d9b7c43dSSatish Balay } 1993d9b7c43dSSatish Balay if (diag) { 1994d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1995d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1996d9b7c43dSSatish Balay } 1997d9b7c43dSSatish Balay } 1998d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1999d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2000d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 20019a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2002d9b7c43dSSatish Balay 2003d9b7c43dSSatish Balay return 0; 2004d9b7c43dSSatish Balay } 20051c351548SSatish Balay 20065615d1e5SSatish Balay #undef __FUNC__ 2007d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2008ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 2009ba4ca20aSSatish Balay { 2010ba4ca20aSSatish Balay static int called = 0; 2011ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 2012ba4ca20aSSatish Balay 2013ba4ca20aSSatish Balay if (called) return 0; else called = 1; 2014ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2015ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 2016ba4ca20aSSatish Balay return 0; 2017ba4ca20aSSatish Balay } 2018d9b7c43dSSatish Balay 20192593348eSBarry Smith /* -------------------------------------------------------------------*/ 2020cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 20219867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2022f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2023faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 20241a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 2025b6490206SBarry Smith 0,0, 2026de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 2027b6490206SBarry Smith 0, 2028f2501298SSatish Balay MatTranspose_SeqBAIJ, 202917e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 20309867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2031584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 2032b6490206SBarry Smith 0, 2033d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 20347fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2035b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2036de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 2037d402145bSBarry Smith 0,0, 2038b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 2039b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 2040af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 20417e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 2042ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 20433b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 20443b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 204592c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 204692c4ed94SBarry Smith 0,0,0,0,0,0, 204792c4ed94SBarry Smith MatSetValuesBlocked_SeqBAIJ}; 20482593348eSBarry Smith 20495615d1e5SSatish Balay #undef __FUNC__ 20505615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ" 20512593348eSBarry Smith /*@C 205244cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 205344cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 205444cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 20552bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 20562bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 20572593348eSBarry Smith 20582593348eSBarry Smith Input Parameters: 2059029af93fSBarry Smith . comm - MPI communicator, set to PETSC_COMM_SELF 2060b6490206SBarry Smith . bs - size of block 20612593348eSBarry Smith . m - number of rows 20622593348eSBarry Smith . n - number of columns 2063b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 20642bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 20652bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 20662593348eSBarry Smith 20672593348eSBarry Smith Output Parameter: 20682593348eSBarry Smith . A - the matrix 20692593348eSBarry Smith 20700513a670SBarry Smith Options Database Keys: 20710513a670SBarry Smith $ -mat_no_unroll - uses code that does not unroll the loops in the 2072*0effe34fSLois Curfman McInnes $ block calculations (much slower) 20730513a670SBarry Smith $ -mat_block_size - size of the blocks to use 20740513a670SBarry Smith 20752593348eSBarry Smith Notes: 207644cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 20772593348eSBarry Smith storage. That is, the stored row and column indices can begin at 207844cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 20792593348eSBarry Smith 20802593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 20812593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 20822593348eSBarry Smith allocation. For additional details, see the users manual chapter on 20836da5968aSLois Curfman McInnes matrices. 20842593348eSBarry Smith 208544cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 20862593348eSBarry Smith @*/ 2087b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 20882593348eSBarry Smith { 20892593348eSBarry Smith Mat B; 2090b6490206SBarry Smith Mat_SeqBAIJ *b; 20913b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 20923b2fbd54SBarry Smith 20933b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 2094e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2095b6490206SBarry Smith 20960513a670SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 20970513a670SBarry Smith 2098f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 2099e3372554SBarry Smith SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 21002593348eSBarry Smith 21012593348eSBarry Smith *A = 0; 2102d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 21032593348eSBarry Smith PLogObjectCreate(B); 2104b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 210544cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 21062593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 21071a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 21081a6a6d98SBarry Smith if (!flg) { 21097fc0212eSBarry Smith switch (bs) { 21107fc0212eSBarry Smith case 1: 21117fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21121a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 211339b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 2114f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 21157fc0212eSBarry Smith break; 21164eeb42bcSBarry Smith case 2: 21174eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 21181a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 211939b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 2120f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 21216d84be18SBarry Smith break; 2122f327dd97SBarry Smith case 3: 2123f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 21241a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 212539b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 2126f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 21274eeb42bcSBarry Smith break; 21286d84be18SBarry Smith case 4: 21296d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 21301a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 213139b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 2132f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 21336d84be18SBarry Smith break; 21346d84be18SBarry Smith case 5: 21356d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 21361a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 213739b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 2138f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 21396d84be18SBarry Smith break; 214048e9ddb2SSatish Balay case 7: 214148e9ddb2SSatish Balay B->ops.mult = MatMult_SeqBAIJ_7; 214248e9ddb2SSatish Balay B->ops.solve = MatSolve_SeqBAIJ_7; 214348e9ddb2SSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_7; 214448e9ddb2SSatish Balay break; 21457fc0212eSBarry Smith } 21461a6a6d98SBarry Smith } 2147b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 2148b6490206SBarry Smith B->view = MatView_SeqBAIJ; 21492593348eSBarry Smith B->factor = 0; 21502593348eSBarry Smith B->lupivotthreshold = 1.0; 215190f02eecSBarry Smith B->mapping = 0; 21522593348eSBarry Smith b->row = 0; 21532593348eSBarry Smith b->col = 0; 21542593348eSBarry Smith b->reallocs = 0; 21552593348eSBarry Smith 215644cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 215744cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 2158b6490206SBarry Smith b->mbs = mbs; 2159f2501298SSatish Balay b->nbs = nbs; 2160b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 21612593348eSBarry Smith if (nnz == PETSC_NULL) { 2162119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 21632593348eSBarry Smith else if (nz <= 0) nz = 1; 2164b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2165b6490206SBarry Smith nz = nz*mbs; 21662593348eSBarry Smith } 21672593348eSBarry Smith else { 21682593348eSBarry Smith nz = 0; 2169b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 21702593348eSBarry Smith } 21712593348eSBarry Smith 21722593348eSBarry Smith /* allocate the matrix space */ 21737e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 21742593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 21757e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 21767e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 21772593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 21782593348eSBarry Smith b->i = b->j + nz; 21792593348eSBarry Smith b->singlemalloc = 1; 21802593348eSBarry Smith 2181de6a44a3SBarry Smith b->i[0] = 0; 2182b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 21832593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 21842593348eSBarry Smith } 21852593348eSBarry Smith 2186b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2187b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2188f09e8eb9SSatish Balay PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2189b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 21902593348eSBarry Smith 2191b6490206SBarry Smith b->bs = bs; 21929df24120SSatish Balay b->bs2 = bs2; 2193b6490206SBarry Smith b->mbs = mbs; 21942593348eSBarry Smith b->nz = 0; 219518e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 21962593348eSBarry Smith b->sorted = 0; 21972593348eSBarry Smith b->roworiented = 1; 21982593348eSBarry Smith b->nonew = 0; 21992593348eSBarry Smith b->diag = 0; 22002593348eSBarry Smith b->solve_work = 0; 2201de6a44a3SBarry Smith b->mult_work = 0; 22022593348eSBarry Smith b->spptr = 0; 22034e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 22044e220ebcSLois Curfman McInnes 22052593348eSBarry Smith *A = B; 22062593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 22072593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 22082593348eSBarry Smith return 0; 22092593348eSBarry Smith } 22102593348eSBarry Smith 22115615d1e5SSatish Balay #undef __FUNC__ 22125615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2213b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 22142593348eSBarry Smith { 22152593348eSBarry Smith Mat C; 2216b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 22179df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2218de6a44a3SBarry Smith 2219e3372554SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 22202593348eSBarry Smith 22212593348eSBarry Smith *B = 0; 2222d4bb536fSBarry Smith PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 22232593348eSBarry Smith PLogObjectCreate(C); 2224b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 22252593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2226b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 2227b6490206SBarry Smith C->view = MatView_SeqBAIJ; 22282593348eSBarry Smith C->factor = A->factor; 22292593348eSBarry Smith c->row = 0; 22302593348eSBarry Smith c->col = 0; 22312593348eSBarry Smith C->assembled = PETSC_TRUE; 22322593348eSBarry Smith 223344cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 223444cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 223544cd7ae7SLois Curfman McInnes C->M = a->m; 223644cd7ae7SLois Curfman McInnes C->N = a->n; 223744cd7ae7SLois Curfman McInnes 2238b6490206SBarry Smith c->bs = a->bs; 22399df24120SSatish Balay c->bs2 = a->bs2; 2240b6490206SBarry Smith c->mbs = a->mbs; 2241b6490206SBarry Smith c->nbs = a->nbs; 22422593348eSBarry Smith 2243b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2244b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2245b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22462593348eSBarry Smith c->imax[i] = a->imax[i]; 22472593348eSBarry Smith c->ilen[i] = a->ilen[i]; 22482593348eSBarry Smith } 22492593348eSBarry Smith 22502593348eSBarry Smith /* allocate the matrix space */ 22512593348eSBarry Smith c->singlemalloc = 1; 22527e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 22532593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 22547e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 2255de6a44a3SBarry Smith c->i = c->j + nz; 2256b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2257b6490206SBarry Smith if (mbs > 0) { 2258de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 22592593348eSBarry Smith if (cpvalues == COPY_VALUES) { 22607e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 22612593348eSBarry Smith } 22622593348eSBarry Smith } 22632593348eSBarry Smith 2264f09e8eb9SSatish Balay PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 22652593348eSBarry Smith c->sorted = a->sorted; 22662593348eSBarry Smith c->roworiented = a->roworiented; 22672593348eSBarry Smith c->nonew = a->nonew; 22682593348eSBarry Smith 22692593348eSBarry Smith if (a->diag) { 2270b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2271b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2272b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22732593348eSBarry Smith c->diag[i] = a->diag[i]; 22742593348eSBarry Smith } 22752593348eSBarry Smith } 22762593348eSBarry Smith else c->diag = 0; 22772593348eSBarry Smith c->nz = a->nz; 22782593348eSBarry Smith c->maxnz = a->maxnz; 22792593348eSBarry Smith c->solve_work = 0; 22802593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 22817fc0212eSBarry Smith c->mult_work = 0; 22822593348eSBarry Smith *B = C; 22832593348eSBarry Smith return 0; 22842593348eSBarry Smith } 22852593348eSBarry Smith 22865615d1e5SSatish Balay #undef __FUNC__ 22875615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ" 228819bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 22892593348eSBarry Smith { 2290b6490206SBarry Smith Mat_SeqBAIJ *a; 22912593348eSBarry Smith Mat B; 2292de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2293b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 229435aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2295a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 2296b6490206SBarry Smith Scalar *aa; 229719bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 22982593348eSBarry Smith 22991a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2300de6a44a3SBarry Smith bs2 = bs*bs; 2301b6490206SBarry Smith 23022593348eSBarry Smith MPI_Comm_size(comm,&size); 2303e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"view must have one processor"); 230490ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 230577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2306e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 23072593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 23082593348eSBarry Smith 2309e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 231035aab85fSBarry Smith 231135aab85fSBarry Smith /* 231235aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 231335aab85fSBarry Smith divisible by the blocksize 231435aab85fSBarry Smith */ 2315b6490206SBarry Smith mbs = M/bs; 231635aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 231735aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 231835aab85fSBarry Smith else mbs++; 231935aab85fSBarry Smith if (extra_rows) { 2320537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 232135aab85fSBarry Smith } 2322b6490206SBarry Smith 23232593348eSBarry Smith /* read in row lengths */ 232435aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 232577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 232635aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 23272593348eSBarry Smith 2328b6490206SBarry Smith /* read in column indices */ 232935aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 233077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 233135aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2332b6490206SBarry Smith 2333b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2334b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2335b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 233635aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 233735aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 233835aab85fSBarry Smith masked = mask + mbs; 2339b6490206SBarry Smith rowcount = 0; nzcount = 0; 2340b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 234135aab85fSBarry Smith nmask = 0; 2342b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2343b6490206SBarry Smith kmax = rowlengths[rowcount]; 2344b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 234535aab85fSBarry Smith tmp = jj[nzcount++]/bs; 234635aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2347b6490206SBarry Smith } 2348b6490206SBarry Smith rowcount++; 2349b6490206SBarry Smith } 235035aab85fSBarry Smith browlengths[i] += nmask; 235135aab85fSBarry Smith /* zero out the mask elements we set */ 235235aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2353b6490206SBarry Smith } 2354b6490206SBarry Smith 23552593348eSBarry Smith /* create our matrix */ 23563eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 23572593348eSBarry Smith B = *A; 2358b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 23592593348eSBarry Smith 23602593348eSBarry Smith /* set matrix "i" values */ 2361de6a44a3SBarry Smith a->i[0] = 0; 2362b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 2363b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2364b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 23652593348eSBarry Smith } 2366b6490206SBarry Smith a->nz = 0; 2367b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 23682593348eSBarry Smith 2369b6490206SBarry Smith /* read in nonzero values */ 237035aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 237177c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 237235aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2373b6490206SBarry Smith 2374b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2375b6490206SBarry Smith nzcount = 0; jcount = 0; 2376b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2377b6490206SBarry Smith nzcountb = nzcount; 237835aab85fSBarry Smith nmask = 0; 2379b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2380b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2381b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 238235aab85fSBarry Smith tmp = jj[nzcount++]/bs; 238335aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2384b6490206SBarry Smith } 2385b6490206SBarry Smith rowcount++; 2386b6490206SBarry Smith } 2387de6a44a3SBarry Smith /* sort the masked values */ 238877c4ece6SBarry Smith PetscSortInt(nmask,masked); 2389de6a44a3SBarry Smith 2390b6490206SBarry Smith /* set "j" values into matrix */ 2391b6490206SBarry Smith maskcount = 1; 239235aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 239335aab85fSBarry Smith a->j[jcount++] = masked[j]; 2394de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2395b6490206SBarry Smith } 2396b6490206SBarry Smith /* set "a" values into matrix */ 2397de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2398b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2399b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2400b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2401de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2402de6a44a3SBarry Smith block = mask[tmp] - 1; 2403de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2404de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2405b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2406b6490206SBarry Smith } 2407b6490206SBarry Smith } 240835aab85fSBarry Smith /* zero out the mask elements we set */ 240935aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2410b6490206SBarry Smith } 2411e3372554SBarry Smith if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2412b6490206SBarry Smith 2413b6490206SBarry Smith PetscFree(rowlengths); 2414b6490206SBarry Smith PetscFree(browlengths); 2415b6490206SBarry Smith PetscFree(aa); 2416b6490206SBarry Smith PetscFree(jj); 2417b6490206SBarry Smith PetscFree(mask); 2418b6490206SBarry Smith 2419b6490206SBarry Smith B->assembled = PETSC_TRUE; 2420de6a44a3SBarry Smith 2421de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2422de6a44a3SBarry Smith if (flg) { 242319bcc07fSBarry Smith Viewer tviewer; 242419bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2425639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 242619bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 242719bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2428de6a44a3SBarry Smith } 2429de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2430de6a44a3SBarry Smith if (flg) { 243119bcc07fSBarry Smith Viewer tviewer; 243219bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2433639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 243419bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 243519bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2436de6a44a3SBarry Smith } 2437de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2438de6a44a3SBarry Smith if (flg) { 243919bcc07fSBarry Smith Viewer tviewer; 244019bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 244119bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 244219bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2443de6a44a3SBarry Smith } 2444de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2445de6a44a3SBarry Smith if (flg) { 244619bcc07fSBarry Smith Viewer tviewer; 244719bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2448639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 244919bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 245019bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2451de6a44a3SBarry Smith } 2452de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2453de6a44a3SBarry Smith if (flg) { 245419bcc07fSBarry Smith Viewer tviewer; 245519bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 245619bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 245719bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 245819bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2459de6a44a3SBarry Smith } 24602593348eSBarry Smith return 0; 24612593348eSBarry Smith } 24622593348eSBarry Smith 24632593348eSBarry Smith 24642593348eSBarry Smith 2465