1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 2*0752156aSBarry Smith static char vcid[] = "$Id: baij.c,v 1.111 1997/08/29 17:15:56 curfman Exp bsmith $"; 32593348eSBarry Smith #endif 42593348eSBarry Smith 52593348eSBarry Smith /* 6b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 72593348eSBarry Smith matrix storage format. 82593348eSBarry Smith */ 93369ce9aSBarry Smith 103369ce9aSBarry Smith #include "pinclude/pviewer.h" 113369ce9aSBarry Smith #include "sys.h" 1270f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 131a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 141a6a6d98SBarry Smith #include "src/inline/spops.h" 152593348eSBarry Smith #include "petsc.h" 163b547af2SSatish Balay 172593348eSBarry Smith 18de6a44a3SBarry Smith /* 19de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 20de6a44a3SBarry Smith */ 21de6a44a3SBarry Smith 225615d1e5SSatish Balay #undef __FUNC__ 235615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 25de6a44a3SBarry Smith { 26de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 277fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 28de6a44a3SBarry Smith 29de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 317fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 32de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33de6a44a3SBarry Smith if (a->j[j] == i) { 34de6a44a3SBarry Smith diag[i] = j; 35de6a44a3SBarry Smith break; 36de6a44a3SBarry Smith } 37de6a44a3SBarry Smith } 38de6a44a3SBarry Smith } 39de6a44a3SBarry Smith a->diag = diag; 40de6a44a3SBarry Smith return 0; 41de6a44a3SBarry Smith } 422593348eSBarry Smith 432593348eSBarry Smith 443b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 453b2fbd54SBarry Smith 465615d1e5SSatish Balay #undef __FUNC__ 475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 483b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 493b2fbd54SBarry Smith PetscTruth *done) 503b2fbd54SBarry Smith { 513b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 523b2fbd54SBarry Smith int ierr, n = a->mbs,i; 533b2fbd54SBarry Smith 543b2fbd54SBarry Smith *nn = n; 553b2fbd54SBarry Smith if (!ia) return 0; 563b2fbd54SBarry Smith if (symmetric) { 573b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 583b2fbd54SBarry Smith } else if (oshift == 1) { 593b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 603b2fbd54SBarry Smith int nz = a->i[n] + 1; 613b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 623b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 633b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 643b2fbd54SBarry Smith } else { 653b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 663b2fbd54SBarry Smith } 673b2fbd54SBarry Smith 683b2fbd54SBarry Smith return 0; 693b2fbd54SBarry Smith } 703b2fbd54SBarry Smith 715615d1e5SSatish Balay #undef __FUNC__ 72d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 733b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 743b2fbd54SBarry Smith PetscTruth *done) 753b2fbd54SBarry Smith { 763b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773b2fbd54SBarry Smith int i,n = a->mbs; 783b2fbd54SBarry Smith 793b2fbd54SBarry Smith if (!ia) return 0; 803b2fbd54SBarry Smith if (symmetric) { 813b2fbd54SBarry Smith PetscFree(*ia); 823b2fbd54SBarry Smith PetscFree(*ja); 83af5da2bfSBarry Smith } else if (oshift == 1) { 843b2fbd54SBarry Smith int nz = a->i[n]; 853b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 863b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 873b2fbd54SBarry Smith } 883b2fbd54SBarry Smith return 0; 893b2fbd54SBarry Smith } 903b2fbd54SBarry Smith 913b2fbd54SBarry Smith 925615d1e5SSatish Balay #undef __FUNC__ 93d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" 94b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 952593348eSBarry Smith { 96b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 979df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98b6490206SBarry Smith Scalar *aa; 992593348eSBarry Smith 10090ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1012593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1022593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1033b2fbd54SBarry Smith 1042593348eSBarry Smith col_lens[1] = a->m; 1052593348eSBarry Smith col_lens[2] = a->n; 1067e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1072593348eSBarry Smith 1082593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 109b6490206SBarry Smith count = 0; 110b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 111b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 112b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113b6490206SBarry Smith } 1142593348eSBarry Smith } 115*0752156aSBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 1162593348eSBarry Smith PetscFree(col_lens); 1172593348eSBarry Smith 1182593348eSBarry Smith /* store column indices (zero start index) */ 11966963ce1SSatish Balay jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120b6490206SBarry Smith count = 0; 121b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 122b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 123b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 125b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1262593348eSBarry Smith } 1272593348eSBarry Smith } 128b6490206SBarry Smith } 129b6490206SBarry Smith } 130*0752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 131b6490206SBarry Smith PetscFree(jj); 1322593348eSBarry Smith 1332593348eSBarry Smith /* store nonzero values */ 13466963ce1SSatish Balay aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135b6490206SBarry Smith count = 0; 136b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 137b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 138b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1407e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 141b6490206SBarry Smith } 142b6490206SBarry Smith } 143b6490206SBarry Smith } 144b6490206SBarry Smith } 145*0752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 146b6490206SBarry Smith PetscFree(aa); 1472593348eSBarry Smith return 0; 1482593348eSBarry Smith } 1492593348eSBarry Smith 1505615d1e5SSatish Balay #undef __FUNC__ 151d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1532593348eSBarry Smith { 154b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1559df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1562593348eSBarry Smith FILE *fd; 1572593348eSBarry Smith char *outputname; 1582593348eSBarry Smith 15990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1602593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 162639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1637ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1642593348eSBarry Smith } 165639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166e3372554SBarry Smith SETERRQ(1,0,"Matlab format not supported"); 1672593348eSBarry Smith } 168639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 16944cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17044cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17144cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17244cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17344cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 175766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17644cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17744cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 180766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 18144cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18344cd7ae7SLois Curfman McInnes #else 18444cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18544cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18644cd7ae7SLois Curfman McInnes #endif 18744cd7ae7SLois Curfman McInnes } 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 19044cd7ae7SLois Curfman McInnes } 19144cd7ae7SLois Curfman McInnes } 19244cd7ae7SLois Curfman McInnes } 1932593348eSBarry Smith else { 194b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 195b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 196b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 197b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 19988685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 200766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 20188685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2027e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20388685aaeSLois Curfman McInnes } 204766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 205766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 206766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 207766eeae4SLois Curfman McInnes } 20888685aaeSLois Curfman McInnes else { 2097e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 21088685aaeSLois Curfman McInnes } 21188685aaeSLois Curfman McInnes #else 2127e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 21388685aaeSLois Curfman McInnes #endif 2142593348eSBarry Smith } 2152593348eSBarry Smith } 2162593348eSBarry Smith fprintf(fd,"\n"); 2172593348eSBarry Smith } 2182593348eSBarry Smith } 219b6490206SBarry Smith } 2202593348eSBarry Smith fflush(fd); 2212593348eSBarry Smith return 0; 2222593348eSBarry Smith } 2232593348eSBarry Smith 2245615d1e5SSatish Balay #undef __FUNC__ 225d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" 2263270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2273270192aSSatish Balay { 2283270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2293270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2303270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2313270192aSSatish Balay Scalar *aa; 2323270192aSSatish Balay Draw draw; 2333270192aSSatish Balay DrawButton button; 2343270192aSSatish Balay PetscTruth isnull; 2353270192aSSatish Balay 2363270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2373270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2383270192aSSatish Balay 2393270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2403270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2413270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2423270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2433270192aSSatish Balay color = DRAW_BLUE; 2443270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2453270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2463270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2473270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2483270192aSSatish Balay aa = a->a + j*bs2; 2493270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2503270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2513270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 252b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2533270192aSSatish Balay } 2543270192aSSatish Balay } 2553270192aSSatish Balay } 2563270192aSSatish Balay } 2573270192aSSatish Balay color = DRAW_CYAN; 2583270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2593270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2603270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2613270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2623270192aSSatish Balay aa = a->a + j*bs2; 2633270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2643270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2653270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 266b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2673270192aSSatish Balay } 2683270192aSSatish Balay } 2693270192aSSatish Balay } 2703270192aSSatish Balay } 2713270192aSSatish Balay 2723270192aSSatish Balay color = DRAW_RED; 2733270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2743270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2753270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2763270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2773270192aSSatish Balay aa = a->a + j*bs2; 2783270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2793270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2803270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 281b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2823270192aSSatish Balay } 2833270192aSSatish Balay } 2843270192aSSatish Balay } 2853270192aSSatish Balay } 2863270192aSSatish Balay 2873270192aSSatish Balay DrawFlush(draw); 2883270192aSSatish Balay DrawGetPause(draw,&pause); 2893270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2903270192aSSatish Balay 2913270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2923270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2933270192aSSatish Balay while (button != BUTTON_RIGHT) { 2943270192aSSatish Balay DrawClear(draw); 2953270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2963270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2973270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2983270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 2993270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 3003270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 3013270192aSSatish Balay w *= scale; h *= scale; 3023270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 3033270192aSSatish Balay color = DRAW_BLUE; 3043270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3053270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3063270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3073270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3083270192aSSatish Balay aa = a->a + j*bs2; 3093270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3103270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3113270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 312b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3133270192aSSatish Balay } 3143270192aSSatish Balay } 3153270192aSSatish Balay } 3163270192aSSatish Balay } 3173270192aSSatish Balay color = DRAW_CYAN; 3183270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3193270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3203270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3213270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3223270192aSSatish Balay aa = a->a + j*bs2; 3233270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3243270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3253270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 326b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3273270192aSSatish Balay } 3283270192aSSatish Balay } 3293270192aSSatish Balay } 3303270192aSSatish Balay } 3313270192aSSatish Balay 3323270192aSSatish Balay color = DRAW_RED; 3333270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3343270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3353270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3363270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3373270192aSSatish Balay aa = a->a + j*bs2; 3383270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3393270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3403270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 341b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3423270192aSSatish Balay } 3433270192aSSatish Balay } 3443270192aSSatish Balay } 3453270192aSSatish Balay } 3463270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3473270192aSSatish Balay } 3483270192aSSatish Balay return 0; 3493270192aSSatish Balay } 3503270192aSSatish Balay 3515615d1e5SSatish Balay #undef __FUNC__ 352d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ" 3538f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3542593348eSBarry Smith { 3552593348eSBarry Smith Mat A = (Mat) obj; 35619bcc07fSBarry Smith ViewerType vtype; 35719bcc07fSBarry Smith int ierr; 3582593348eSBarry Smith 35919bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36019bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 361e3372554SBarry Smith SETERRQ(1,0,"Matlab viewer not supported"); 3622593348eSBarry Smith } 36319bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 364b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3652593348eSBarry Smith } 36619bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 367b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3682593348eSBarry Smith } 36919bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3703270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3712593348eSBarry Smith } 3722593348eSBarry Smith return 0; 3732593348eSBarry Smith } 374b6490206SBarry Smith 375119a934aSSatish Balay #define CHUNKSIZE 10 376cd0e1443SSatish Balay 3775615d1e5SSatish Balay #undef __FUNC__ 3785615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ" 379639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 380cd0e1443SSatish Balay { 381cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 382cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 383cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 384a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3859df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 386cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 387cd0e1443SSatish Balay 388cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 389cd0e1443SSatish Balay row = im[k]; brow = row/bs; 3903b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 391e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 392e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 3933b2fbd54SBarry Smith #endif 3942c3acbe9SBarry Smith rp = aj + ai[brow]; 3952c3acbe9SBarry Smith ap = aa + bs2*ai[brow]; 3962c3acbe9SBarry Smith rmax = imax[brow]; 3972c3acbe9SBarry Smith nrow = ailen[brow]; 398cd0e1443SSatish Balay low = 0; 399cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 4003b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 401e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 402e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 4033b2fbd54SBarry Smith #endif 404a7c10996SSatish Balay col = in[l]; bcol = col/bs; 405cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 406cd0e1443SSatish Balay if (roworiented) { 407cd0e1443SSatish Balay value = *v++; 408cd0e1443SSatish Balay } 409cd0e1443SSatish Balay else { 410cd0e1443SSatish Balay value = v[k + l*m]; 411cd0e1443SSatish Balay } 412cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 4132c3acbe9SBarry Smith while (high-low > 7) { 414cd0e1443SSatish Balay t = (low+high)/2; 415cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 416cd0e1443SSatish Balay else low = t; 417cd0e1443SSatish Balay } 418cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 419cd0e1443SSatish Balay if (rp[i] > bcol) break; 420cd0e1443SSatish Balay if (rp[i] == bcol) { 4217e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 422cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 423cd0e1443SSatish Balay else *bap = value; 424f1241b54SBarry Smith goto noinsert1; 425cd0e1443SSatish Balay } 426cd0e1443SSatish Balay } 427c2653b3dSLois Curfman McInnes if (nonew == 1) goto noinsert1; 42811ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 429cd0e1443SSatish Balay if (nrow >= rmax) { 430cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 431cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 432cd0e1443SSatish Balay Scalar *new_a; 433cd0e1443SSatish Balay 43411ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 43596854ed6SLois Curfman McInnes 43696854ed6SLois Curfman McInnes /* Malloc new storage space */ 4377e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 438cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4397e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 440cd0e1443SSatish Balay new_i = new_j + new_nz; 441cd0e1443SSatish Balay 442cd0e1443SSatish Balay /* copy over old data into new slots */ 443cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4447e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 445a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 446a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 447a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 448cd0e1443SSatish Balay len*sizeof(int)); 449a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 450a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 451a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 452a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 453cd0e1443SSatish Balay /* free up old matrix storage */ 454cd0e1443SSatish Balay PetscFree(a->a); 455cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 456cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 457cd0e1443SSatish Balay a->singlemalloc = 1; 458cd0e1443SSatish Balay 459a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 460cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4617e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 46218e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 463cd0e1443SSatish Balay a->reallocs++; 464119a934aSSatish Balay a->nz++; 465cd0e1443SSatish Balay } 4667e67e3f9SSatish Balay N = nrow++ - 1; 467cd0e1443SSatish Balay /* shift up all the later entries in this row */ 468cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 469cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4707e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 471cd0e1443SSatish Balay } 4727e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 473cd0e1443SSatish Balay rp[i] = bcol; 4747e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 475f1241b54SBarry Smith noinsert1:; 476cd0e1443SSatish Balay low = i; 477cd0e1443SSatish Balay } 478cd0e1443SSatish Balay ailen[brow] = nrow; 479cd0e1443SSatish Balay } 480cd0e1443SSatish Balay return 0; 481cd0e1443SSatish Balay } 482cd0e1443SSatish Balay 4835615d1e5SSatish Balay #undef __FUNC__ 48405a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 48592c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 48692c4ed94SBarry Smith { 48792c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4888a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 489dd9472c6SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 490dd9472c6SBarry Smith int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 4910e324ae4SSatish Balay Scalar *ap,*value=v,*aa=a->a,*bap; 49292c4ed94SBarry Smith 4930e324ae4SSatish Balay if (roworiented) { 4940e324ae4SSatish Balay stepval = (n-1)*bs; 4950e324ae4SSatish Balay } else { 4960e324ae4SSatish Balay stepval = (m-1)*bs; 4970e324ae4SSatish Balay } 49892c4ed94SBarry Smith for ( k=0; k<m; k++ ) { /* loop over added rows */ 49992c4ed94SBarry Smith row = im[k]; 50092c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 50192c4ed94SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 5028a84c255SSatish Balay if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 50392c4ed94SBarry Smith #endif 50492c4ed94SBarry Smith rp = aj + ai[row]; 50592c4ed94SBarry Smith ap = aa + bs2*ai[row]; 50692c4ed94SBarry Smith rmax = imax[row]; 50792c4ed94SBarry Smith nrow = ailen[row]; 50892c4ed94SBarry Smith low = 0; 50992c4ed94SBarry Smith for ( l=0; l<n; l++ ) { /* loop over added columns */ 51092c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 51192c4ed94SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 5128a84c255SSatish Balay if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 51392c4ed94SBarry Smith #endif 51492c4ed94SBarry Smith col = in[l]; 51592c4ed94SBarry Smith if (roworiented) { 5160e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 5170e324ae4SSatish Balay } else { 5180e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 51992c4ed94SBarry Smith } 52092c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 52192c4ed94SBarry Smith while (high-low > 7) { 52292c4ed94SBarry Smith t = (low+high)/2; 52392c4ed94SBarry Smith if (rp[t] > col) high = t; 52492c4ed94SBarry Smith else low = t; 52592c4ed94SBarry Smith } 52692c4ed94SBarry Smith for ( i=low; i<high; i++ ) { 52792c4ed94SBarry Smith if (rp[i] > col) break; 52892c4ed94SBarry Smith if (rp[i] == col) { 5298a84c255SSatish Balay bap = ap + bs2*i; 5300e324ae4SSatish Balay if (roworiented) { 5318a84c255SSatish Balay if (is == ADD_VALUES) { 532dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 533dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5348a84c255SSatish Balay bap[jj] += *value++; 535dd9472c6SBarry Smith } 536dd9472c6SBarry Smith } 5370e324ae4SSatish Balay } else { 538dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 539dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5400e324ae4SSatish Balay bap[jj] = *value++; 5418a84c255SSatish Balay } 542dd9472c6SBarry Smith } 543dd9472c6SBarry Smith } 5440e324ae4SSatish Balay } else { 5450e324ae4SSatish Balay if (is == ADD_VALUES) { 546dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 547dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5480e324ae4SSatish Balay *bap++ += *value++; 549dd9472c6SBarry Smith } 550dd9472c6SBarry Smith } 5510e324ae4SSatish Balay } else { 552dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 553dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5540e324ae4SSatish Balay *bap++ = *value++; 5550e324ae4SSatish Balay } 5568a84c255SSatish Balay } 557dd9472c6SBarry Smith } 558dd9472c6SBarry Smith } 559f1241b54SBarry Smith goto noinsert2; 56092c4ed94SBarry Smith } 56192c4ed94SBarry Smith } 56289280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 56311ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 56492c4ed94SBarry Smith if (nrow >= rmax) { 56592c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 56692c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 56792c4ed94SBarry Smith Scalar *new_a; 56892c4ed94SBarry Smith 56911ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 57089280ab3SLois Curfman McInnes 57192c4ed94SBarry Smith /* malloc new storage space */ 57292c4ed94SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 57392c4ed94SBarry Smith new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 57492c4ed94SBarry Smith new_j = (int *) (new_a + bs2*new_nz); 57592c4ed94SBarry Smith new_i = new_j + new_nz; 57692c4ed94SBarry Smith 57792c4ed94SBarry Smith /* copy over old data into new slots */ 57892c4ed94SBarry Smith for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 57992c4ed94SBarry Smith for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 58092c4ed94SBarry Smith PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 58192c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 582dd9472c6SBarry Smith PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 58392c4ed94SBarry Smith PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 58492c4ed94SBarry Smith PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 585dd9472c6SBarry Smith PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 58692c4ed94SBarry Smith /* free up old matrix storage */ 58792c4ed94SBarry Smith PetscFree(a->a); 58892c4ed94SBarry Smith if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 58992c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 59092c4ed94SBarry Smith a->singlemalloc = 1; 59192c4ed94SBarry Smith 59292c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 59392c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 59492c4ed94SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 59592c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 59692c4ed94SBarry Smith a->reallocs++; 59792c4ed94SBarry Smith a->nz++; 59892c4ed94SBarry Smith } 59992c4ed94SBarry Smith N = nrow++ - 1; 60092c4ed94SBarry Smith /* shift up all the later entries in this row */ 60192c4ed94SBarry Smith for ( ii=N; ii>=i; ii-- ) { 60292c4ed94SBarry Smith rp[ii+1] = rp[ii]; 60392c4ed94SBarry Smith PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 60492c4ed94SBarry Smith } 60592c4ed94SBarry Smith if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 60692c4ed94SBarry Smith rp[i] = col; 6078a84c255SSatish Balay bap = ap + bs2*i; 6080e324ae4SSatish Balay if (roworiented) { 609dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval) { 610dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 6110e324ae4SSatish Balay bap[jj] = *value++; 612dd9472c6SBarry Smith } 613dd9472c6SBarry Smith } 6140e324ae4SSatish Balay } else { 615dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 616dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 6170e324ae4SSatish Balay *bap++ = *value++; 6180e324ae4SSatish Balay } 619dd9472c6SBarry Smith } 620dd9472c6SBarry Smith } 621f1241b54SBarry Smith noinsert2:; 62292c4ed94SBarry Smith low = i; 62392c4ed94SBarry Smith } 62492c4ed94SBarry Smith ailen[row] = nrow; 62592c4ed94SBarry Smith } 62692c4ed94SBarry Smith return 0; 62792c4ed94SBarry Smith } 62892c4ed94SBarry Smith 62992c4ed94SBarry Smith #undef __FUNC__ 630d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" 6318f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 6320b824a48SSatish Balay { 6330b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 634*0752156aSBarry Smith if (m) *m = a->m; 635*0752156aSBarry Smith if (n) *n = a->n; 6360b824a48SSatish Balay return 0; 6370b824a48SSatish Balay } 6380b824a48SSatish Balay 6395615d1e5SSatish Balay #undef __FUNC__ 640d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 6418f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 6420b824a48SSatish Balay { 6430b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6440b824a48SSatish Balay *m = 0; *n = a->m; 6450b824a48SSatish Balay return 0; 6460b824a48SSatish Balay } 6470b824a48SSatish Balay 6485615d1e5SSatish Balay #undef __FUNC__ 6495615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ" 6509867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6519867e207SSatish Balay { 6529867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6537e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 6549867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 6559867e207SSatish Balay 6569867e207SSatish Balay bs = a->bs; 6579867e207SSatish Balay ai = a->i; 6589867e207SSatish Balay aj = a->j; 6599867e207SSatish Balay aa = a->a; 6609df24120SSatish Balay bs2 = a->bs2; 6619867e207SSatish Balay 662e3372554SBarry Smith if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 6639867e207SSatish Balay 6649867e207SSatish Balay bn = row/bs; /* Block number */ 6659867e207SSatish Balay bp = row % bs; /* Block Position */ 6669867e207SSatish Balay M = ai[bn+1] - ai[bn]; 6679867e207SSatish Balay *nz = bs*M; 6689867e207SSatish Balay 6699867e207SSatish Balay if (v) { 6709867e207SSatish Balay *v = 0; 6719867e207SSatish Balay if (*nz) { 6729867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 6739867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6749867e207SSatish Balay v_i = *v + i*bs; 6757e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 6767e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 6779867e207SSatish Balay } 6789867e207SSatish Balay } 6799867e207SSatish Balay } 6809867e207SSatish Balay 6819867e207SSatish Balay if (idx) { 6829867e207SSatish Balay *idx = 0; 6839867e207SSatish Balay if (*nz) { 6849867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 6859867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6869867e207SSatish Balay idx_i = *idx + i*bs; 6879867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 6889867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 6899867e207SSatish Balay } 6909867e207SSatish Balay } 6919867e207SSatish Balay } 6929867e207SSatish Balay return 0; 6939867e207SSatish Balay } 6949867e207SSatish Balay 6955615d1e5SSatish Balay #undef __FUNC__ 696d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" 6979867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6989867e207SSatish Balay { 6999867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 7009867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 7019867e207SSatish Balay return 0; 7029867e207SSatish Balay } 703b6490206SBarry Smith 7045615d1e5SSatish Balay #undef __FUNC__ 7055615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ" 7068f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B) 707f2501298SSatish Balay { 708f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 709f2501298SSatish Balay Mat C; 710f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 7119df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 712f2501298SSatish Balay Scalar *array=a->a; 713f2501298SSatish Balay 714f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 715e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 716f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 717f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 718a7c10996SSatish Balay 719a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 720f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 721f2501298SSatish Balay PetscFree(col); 722f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 723f2501298SSatish Balay cols = rows + bs; 724f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 725f2501298SSatish Balay cols[0] = i*bs; 726f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 727f2501298SSatish Balay len = ai[i+1] - ai[i]; 728f2501298SSatish Balay for ( j=0; j<len; j++ ) { 729f2501298SSatish Balay rows[0] = (*aj++)*bs; 730f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 731f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 732f2501298SSatish Balay array += bs2; 733f2501298SSatish Balay } 734f2501298SSatish Balay } 7351073c447SSatish Balay PetscFree(rows); 736f2501298SSatish Balay 7376d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7386d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 739f2501298SSatish Balay 740f2501298SSatish Balay if (B != PETSC_NULL) { 741f2501298SSatish Balay *B = C; 742f2501298SSatish Balay } else { 743f2501298SSatish Balay /* This isn't really an in-place transpose */ 744f2501298SSatish Balay PetscFree(a->a); 745f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 746f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 747f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 748f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 749f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 750f2501298SSatish Balay PetscFree(a); 751f09e8eb9SSatish Balay PetscMemcpy(A,C,sizeof(struct _p_Mat)); 752f2501298SSatish Balay PetscHeaderDestroy(C); 753f2501298SSatish Balay } 754f2501298SSatish Balay return 0; 755f2501298SSatish Balay } 756f2501298SSatish Balay 757f2501298SSatish Balay 7585615d1e5SSatish Balay #undef __FUNC__ 7595615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 7608f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 761584200bdSSatish Balay { 762584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 763584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 764a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 76543ee02c3SBarry Smith int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 766584200bdSSatish Balay Scalar *aa = a->a, *ap; 767584200bdSSatish Balay 7686d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 769584200bdSSatish Balay 77043ee02c3SBarry Smith if (m) rmax = ailen[0]; 771584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 772584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 773584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 774d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 775584200bdSSatish Balay if (fshift) { 776a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 777584200bdSSatish Balay N = ailen[i]; 778584200bdSSatish Balay for ( j=0; j<N; j++ ) { 779584200bdSSatish Balay ip[j-fshift] = ip[j]; 7807e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 781584200bdSSatish Balay } 782584200bdSSatish Balay } 783584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 784584200bdSSatish Balay } 785584200bdSSatish Balay if (mbs) { 786584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 787584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 788584200bdSSatish Balay } 789584200bdSSatish Balay /* reset ilen and imax for each row */ 790584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 791584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 792584200bdSSatish Balay } 793a7c10996SSatish Balay a->nz = ai[mbs]; 794584200bdSSatish Balay 795584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 796584200bdSSatish Balay if (fshift && a->diag) { 797584200bdSSatish Balay PetscFree(a->diag); 798584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 799584200bdSSatish Balay a->diag = 0; 800584200bdSSatish Balay } 8013d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 802219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 8033d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 804584200bdSSatish Balay a->reallocs); 805d402145bSBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 806e2f3b5e9SSatish Balay a->reallocs = 0; 8074e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 8084e220ebcSLois Curfman McInnes 809584200bdSSatish Balay return 0; 810584200bdSSatish Balay } 811584200bdSSatish Balay 8125615d1e5SSatish Balay #undef __FUNC__ 8135615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ" 8148f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A) 8152593348eSBarry Smith { 816b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8179df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 8182593348eSBarry Smith return 0; 8192593348eSBarry Smith } 8202593348eSBarry Smith 8215615d1e5SSatish Balay #undef __FUNC__ 822d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" 823b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 8242593348eSBarry Smith { 8252593348eSBarry Smith Mat A = (Mat) obj; 826b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8272593348eSBarry Smith 8282593348eSBarry Smith #if defined(PETSC_LOG) 8292593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 8302593348eSBarry Smith #endif 8312593348eSBarry Smith PetscFree(a->a); 8322593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 8332593348eSBarry Smith if (a->diag) PetscFree(a->diag); 8342593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 8352593348eSBarry Smith if (a->imax) PetscFree(a->imax); 8362593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 837de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 8382593348eSBarry Smith PetscFree(a); 839f2655603SLois Curfman McInnes PLogObjectDestroy(A); 840f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 8412593348eSBarry Smith return 0; 8422593348eSBarry Smith } 8432593348eSBarry Smith 8445615d1e5SSatish Balay #undef __FUNC__ 845d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" 8468f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op) 8472593348eSBarry Smith { 848b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8496d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 8506d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 8516d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 852219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 8536d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 854c2653b3dSLois Curfman McInnes else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 85596854ed6SLois Curfman McInnes else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 8566d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 8576d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 858219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 8596d4a8577SBarry Smith op == MAT_SYMMETRIC || 8606d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 86190f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 8622b362799SSatish Balay op == MAT_IGNORE_OFF_PROC_ENTRIES) 86394a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 8646d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 865e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 8662593348eSBarry Smith else 867e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 8682593348eSBarry Smith return 0; 8692593348eSBarry Smith } 8702593348eSBarry Smith 8712593348eSBarry Smith 8722593348eSBarry Smith /* -------------------------------------------------------*/ 8732593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 8742593348eSBarry Smith /* -------------------------------------------------------*/ 875b6490206SBarry Smith #include "pinclude/plapack.h" 876b6490206SBarry Smith 8775615d1e5SSatish Balay #undef __FUNC__ 8785615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1" 87939b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 8802593348eSBarry Smith { 881b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 88239b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 883c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 8842593348eSBarry Smith 885c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 886c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 887b6490206SBarry Smith 888119a934aSSatish Balay idx = a->j; 889119a934aSSatish Balay v = a->a; 890119a934aSSatish Balay ii = a->i; 891119a934aSSatish Balay 892119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 893119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 894119a934aSSatish Balay sum = 0.0; 895119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 8961a6a6d98SBarry Smith z[i] = sum; 897119a934aSSatish Balay } 898c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 899c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 90039b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 90139b95ed1SBarry Smith return 0; 90239b95ed1SBarry Smith } 90339b95ed1SBarry Smith 9045615d1e5SSatish Balay #undef __FUNC__ 9055615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2" 90639b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 90739b95ed1SBarry Smith { 90839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 90939b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 91039b95ed1SBarry Smith register Scalar x1,x2; 91139b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 912c16cb8f2SBarry Smith int j,n; 91339b95ed1SBarry Smith 914c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 915c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 91639b95ed1SBarry Smith 91739b95ed1SBarry Smith idx = a->j; 91839b95ed1SBarry Smith v = a->a; 91939b95ed1SBarry Smith ii = a->i; 92039b95ed1SBarry Smith 921119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 922119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 923119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 924119a934aSSatish Balay for ( j=0; j<n; j++ ) { 925119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 926119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 927119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 928119a934aSSatish Balay v += 4; 929119a934aSSatish Balay } 9301a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 931119a934aSSatish Balay z += 2; 932119a934aSSatish Balay } 933c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 934c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 93539b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 93639b95ed1SBarry Smith return 0; 93739b95ed1SBarry Smith } 93839b95ed1SBarry Smith 9395615d1e5SSatish Balay #undef __FUNC__ 9405615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3" 94139b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 94239b95ed1SBarry Smith { 94339b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 94439b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 945c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 94639b95ed1SBarry Smith 947c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 948c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 94939b95ed1SBarry Smith 95039b95ed1SBarry Smith idx = a->j; 95139b95ed1SBarry Smith v = a->a; 95239b95ed1SBarry Smith ii = a->i; 95339b95ed1SBarry Smith 954119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 955119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 956119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 957119a934aSSatish Balay for ( j=0; j<n; j++ ) { 958119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 959119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 960119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 961119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 962119a934aSSatish Balay v += 9; 963119a934aSSatish Balay } 9641a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 965119a934aSSatish Balay z += 3; 966119a934aSSatish Balay } 967c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 968c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 96939b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 97039b95ed1SBarry Smith return 0; 97139b95ed1SBarry Smith } 97239b95ed1SBarry Smith 9735615d1e5SSatish Balay #undef __FUNC__ 9745615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4" 97539b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 97639b95ed1SBarry Smith { 97739b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97839b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 97939b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 98039b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 981c16cb8f2SBarry Smith int j,n; 98239b95ed1SBarry Smith 983c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 984c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 98539b95ed1SBarry Smith 98639b95ed1SBarry Smith idx = a->j; 98739b95ed1SBarry Smith v = a->a; 98839b95ed1SBarry Smith ii = a->i; 98939b95ed1SBarry Smith 990119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 991119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 992119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 993119a934aSSatish Balay for ( j=0; j<n; j++ ) { 994119a934aSSatish Balay xb = x + 4*(*idx++); 995119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 996119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 997119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 998119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 999119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1000119a934aSSatish Balay v += 16; 1001119a934aSSatish Balay } 10021a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1003119a934aSSatish Balay z += 4; 1004119a934aSSatish Balay } 1005c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1006c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 100739b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 100839b95ed1SBarry Smith return 0; 100939b95ed1SBarry Smith } 101039b95ed1SBarry Smith 10115615d1e5SSatish Balay #undef __FUNC__ 10125615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5" 101339b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 101439b95ed1SBarry Smith { 101539b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101639b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 101739b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 1018c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 101939b95ed1SBarry Smith 1020c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1021c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 102239b95ed1SBarry Smith 102339b95ed1SBarry Smith idx = a->j; 102439b95ed1SBarry Smith v = a->a; 102539b95ed1SBarry Smith ii = a->i; 102639b95ed1SBarry Smith 1027119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1028119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1029119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1030119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1031119a934aSSatish Balay xb = x + 5*(*idx++); 1032119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1033119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1034119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1035119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1036119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1037119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1038119a934aSSatish Balay v += 25; 1039119a934aSSatish Balay } 10401a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1041119a934aSSatish Balay z += 5; 1042119a934aSSatish Balay } 1043c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1044c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 104539b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 104639b95ed1SBarry Smith return 0; 104739b95ed1SBarry Smith } 104839b95ed1SBarry Smith 10495615d1e5SSatish Balay #undef __FUNC__ 105048e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7" 105148e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 105248e9ddb2SSatish Balay { 105348e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 105448e9ddb2SSatish Balay register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 105548e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 105648e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 105748e9ddb2SSatish Balay 105848e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 105948e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 106048e9ddb2SSatish Balay 106148e9ddb2SSatish Balay idx = a->j; 106248e9ddb2SSatish Balay v = a->a; 106348e9ddb2SSatish Balay ii = a->i; 106448e9ddb2SSatish Balay 106548e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 106648e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 106748e9ddb2SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 106848e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 106948e9ddb2SSatish Balay xb = x + 7*(*idx++); 107048e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 107148e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 107248e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 107348e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 107448e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 107548e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 107648e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 107748e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 107848e9ddb2SSatish Balay v += 49; 107948e9ddb2SSatish Balay } 108048e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 108148e9ddb2SSatish Balay z += 7; 108248e9ddb2SSatish Balay } 108348e9ddb2SSatish Balay 108448e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 108548e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 108648e9ddb2SSatish Balay PLogFlops(98*a->nz - a->m); 108748e9ddb2SSatish Balay return 0; 108848e9ddb2SSatish Balay } 108948e9ddb2SSatish Balay 109048e9ddb2SSatish Balay #undef __FUNC__ 10915615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N" 109239b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 109339b95ed1SBarry Smith { 109439b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 109539b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 1096c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1097c16cb8f2SBarry Smith int _One = 1,ncols,k; 1098c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 109939b95ed1SBarry Smith 1100c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1101c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 110239b95ed1SBarry Smith 110339b95ed1SBarry Smith idx = a->j; 110439b95ed1SBarry Smith v = a->a; 110539b95ed1SBarry Smith ii = a->i; 110639b95ed1SBarry Smith 110739b95ed1SBarry Smith 1108119a934aSSatish Balay if (!a->mult_work) { 11093b547af2SSatish Balay k = PetscMax(a->m,a->n); 11102b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1111119a934aSSatish Balay } 1112119a934aSSatish Balay work = a->mult_work; 1113119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1114119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1115119a934aSSatish Balay ncols = n*bs; 1116119a934aSSatish Balay workt = work; 1117119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1118119a934aSSatish Balay xb = x + bs*(*idx++); 1119119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1120119a934aSSatish Balay workt += bs; 1121119a934aSSatish Balay } 11221a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1123119a934aSSatish Balay v += n*bs2; 1124119a934aSSatish Balay z += bs; 1125119a934aSSatish Balay } 1126c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1127c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 11281a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 1129bb42667fSSatish Balay return 0; 1130bb42667fSSatish Balay } 1131bb42667fSSatish Balay 11325615d1e5SSatish Balay #undef __FUNC__ 11335615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1134f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1135f44d3a6dSSatish Balay { 1136f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1137f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 1138c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 1139f44d3a6dSSatish Balay 1140c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1141c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1142c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1143f44d3a6dSSatish Balay 1144f44d3a6dSSatish Balay idx = a->j; 1145f44d3a6dSSatish Balay v = a->a; 1146f44d3a6dSSatish Balay ii = a->i; 1147f44d3a6dSSatish Balay 1148f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1149f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1150f44d3a6dSSatish Balay sum = y[i]; 1151f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 1152f44d3a6dSSatish Balay z[i] = sum; 1153f44d3a6dSSatish Balay } 1154c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1155c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1156c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1157f44d3a6dSSatish Balay PLogFlops(2*a->nz); 1158f44d3a6dSSatish Balay return 0; 1159f44d3a6dSSatish Balay } 1160f44d3a6dSSatish Balay 11615615d1e5SSatish Balay #undef __FUNC__ 11625615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1163f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1164f44d3a6dSSatish Balay { 1165f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1166f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1167f44d3a6dSSatish Balay register Scalar x1,x2; 1168f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1169c16cb8f2SBarry Smith int j,n; 1170f44d3a6dSSatish Balay 1171c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1172c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1173c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1174f44d3a6dSSatish Balay 1175f44d3a6dSSatish Balay idx = a->j; 1176f44d3a6dSSatish Balay v = a->a; 1177f44d3a6dSSatish Balay ii = a->i; 1178f44d3a6dSSatish Balay 1179f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1180f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1181f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 1182f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1183f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1184f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 1185f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 1186f44d3a6dSSatish Balay v += 4; 1187f44d3a6dSSatish Balay } 1188f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 1189f44d3a6dSSatish Balay z += 2; y += 2; 1190f44d3a6dSSatish Balay } 1191c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1192c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1193c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1194f44d3a6dSSatish Balay PLogFlops(4*a->nz); 1195f44d3a6dSSatish Balay return 0; 1196f44d3a6dSSatish Balay } 1197f44d3a6dSSatish Balay 11985615d1e5SSatish Balay #undef __FUNC__ 11995615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1200f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1201f44d3a6dSSatish Balay { 1202f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1203f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1204c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1205f44d3a6dSSatish Balay 1206c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1207c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1208c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1209f44d3a6dSSatish Balay 1210f44d3a6dSSatish Balay idx = a->j; 1211f44d3a6dSSatish Balay v = a->a; 1212f44d3a6dSSatish Balay ii = a->i; 1213f44d3a6dSSatish Balay 1214f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1215f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1216f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1217f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1218f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1219f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1220f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1221f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1222f44d3a6dSSatish Balay v += 9; 1223f44d3a6dSSatish Balay } 1224f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 1225f44d3a6dSSatish Balay z += 3; y += 3; 1226f44d3a6dSSatish Balay } 1227c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1228c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1229c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1230f44d3a6dSSatish Balay PLogFlops(18*a->nz); 1231f44d3a6dSSatish Balay return 0; 1232f44d3a6dSSatish Balay } 1233f44d3a6dSSatish Balay 12345615d1e5SSatish Balay #undef __FUNC__ 12355615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1236f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1237f44d3a6dSSatish Balay { 1238f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1239f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1240f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 1241f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1242c16cb8f2SBarry Smith int j,n; 1243f44d3a6dSSatish Balay 1244c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1245c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1246c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1247f44d3a6dSSatish Balay 1248f44d3a6dSSatish Balay idx = a->j; 1249f44d3a6dSSatish Balay v = a->a; 1250f44d3a6dSSatish Balay ii = a->i; 1251f44d3a6dSSatish Balay 1252f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1253f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1254f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1255f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1256f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1257f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1258f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1259f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1260f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1261f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1262f44d3a6dSSatish Balay v += 16; 1263f44d3a6dSSatish Balay } 1264f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1265f44d3a6dSSatish Balay z += 4; y += 4; 1266f44d3a6dSSatish Balay } 1267c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1268c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1269c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1270f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1271f44d3a6dSSatish Balay return 0; 1272f44d3a6dSSatish Balay } 1273f44d3a6dSSatish Balay 12745615d1e5SSatish Balay #undef __FUNC__ 12755615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1276f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1277f44d3a6dSSatish Balay { 1278f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1279f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1280f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1281c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1282f44d3a6dSSatish Balay 1283c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1284c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1285c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1286f44d3a6dSSatish Balay 1287f44d3a6dSSatish Balay idx = a->j; 1288f44d3a6dSSatish Balay v = a->a; 1289f44d3a6dSSatish Balay ii = a->i; 1290f44d3a6dSSatish Balay 1291f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1292f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1293f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1294f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1295f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1296f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1297f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1298f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1299f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1300f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1301f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1302f44d3a6dSSatish Balay v += 25; 1303f44d3a6dSSatish Balay } 1304f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1305f44d3a6dSSatish Balay z += 5; y += 5; 1306f44d3a6dSSatish Balay } 1307c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1308c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1309c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1310f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1311f44d3a6dSSatish Balay return 0; 1312f44d3a6dSSatish Balay } 1313f44d3a6dSSatish Balay 13145615d1e5SSatish Balay #undef __FUNC__ 131548e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 131648e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 131748e9ddb2SSatish Balay { 131848e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 131948e9ddb2SSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 132048e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 132148e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 132248e9ddb2SSatish Balay 132348e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 132448e9ddb2SSatish Balay VecGetArray_Fast(yy,y); 132548e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 132648e9ddb2SSatish Balay 132748e9ddb2SSatish Balay idx = a->j; 132848e9ddb2SSatish Balay v = a->a; 132948e9ddb2SSatish Balay ii = a->i; 133048e9ddb2SSatish Balay 133148e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 133248e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 133348e9ddb2SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 133448e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 133548e9ddb2SSatish Balay xb = x + 7*(*idx++); 133648e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 133748e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 133848e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 133948e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 134048e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 134148e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 134248e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 134348e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 134448e9ddb2SSatish Balay v += 49; 134548e9ddb2SSatish Balay } 134648e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 134748e9ddb2SSatish Balay z += 7; y += 7; 134848e9ddb2SSatish Balay } 134948e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 135048e9ddb2SSatish Balay VecRestoreArray_Fast(yy,y); 135148e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 135248e9ddb2SSatish Balay PLogFlops(98*a->nz); 135348e9ddb2SSatish Balay return 0; 135448e9ddb2SSatish Balay } 135548e9ddb2SSatish Balay 135648e9ddb2SSatish Balay 135748e9ddb2SSatish Balay #undef __FUNC__ 13585615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1359f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1360f44d3a6dSSatish Balay { 1361f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1362f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1363f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1364f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1365f44d3a6dSSatish Balay 1366f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1367f44d3a6dSSatish Balay 1368c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1369c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1370f44d3a6dSSatish Balay 1371f44d3a6dSSatish Balay idx = a->j; 1372f44d3a6dSSatish Balay v = a->a; 1373f44d3a6dSSatish Balay ii = a->i; 1374f44d3a6dSSatish Balay 1375f44d3a6dSSatish Balay 1376f44d3a6dSSatish Balay if (!a->mult_work) { 1377f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1378f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1379f44d3a6dSSatish Balay } 1380f44d3a6dSSatish Balay work = a->mult_work; 1381f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1382f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1383f44d3a6dSSatish Balay ncols = n*bs; 1384f44d3a6dSSatish Balay workt = work; 1385f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1386f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1387f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1388f44d3a6dSSatish Balay workt += bs; 1389f44d3a6dSSatish Balay } 1390f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1391f44d3a6dSSatish Balay v += n*bs2; 1392f44d3a6dSSatish Balay z += bs; 1393f44d3a6dSSatish Balay } 1394c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1395c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1396f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1397f44d3a6dSSatish Balay return 0; 1398f44d3a6dSSatish Balay } 1399f44d3a6dSSatish Balay 14005615d1e5SSatish Balay #undef __FUNC__ 14015615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ" 14028f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1403bb42667fSSatish Balay { 1404bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14051a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1406bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1407bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 14089df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1409119a934aSSatish Balay 1410119a934aSSatish Balay 141190f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 141290f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1413bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1414bb42667fSSatish Balay 1415119a934aSSatish Balay idx = a->j; 1416119a934aSSatish Balay v = a->a; 1417119a934aSSatish Balay ii = a->i; 1418119a934aSSatish Balay 1419119a934aSSatish Balay switch (bs) { 1420119a934aSSatish Balay case 1: 1421119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1422119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1423119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1424119a934aSSatish Balay ib = idx + ai[i]; 1425119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1426bb1453f0SSatish Balay rval = ib[j]; 1427bb1453f0SSatish Balay z[rval] += *v++ * x1; 1428119a934aSSatish Balay } 1429119a934aSSatish Balay } 1430119a934aSSatish Balay break; 1431119a934aSSatish Balay case 2: 1432119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1433119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1434119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1435119a934aSSatish Balay ib = idx + ai[i]; 1436119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1437119a934aSSatish Balay rval = ib[j]*2; 1438bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1439bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1440119a934aSSatish Balay v += 4; 1441119a934aSSatish Balay } 1442119a934aSSatish Balay } 1443119a934aSSatish Balay break; 1444119a934aSSatish Balay case 3: 1445119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1446119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1447119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1448119a934aSSatish Balay ib = idx + ai[i]; 1449119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1450119a934aSSatish Balay rval = ib[j]*3; 1451bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1452bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1453bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1454119a934aSSatish Balay v += 9; 1455119a934aSSatish Balay } 1456119a934aSSatish Balay } 1457119a934aSSatish Balay break; 1458119a934aSSatish Balay case 4: 1459119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1460119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1461119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1462119a934aSSatish Balay ib = idx + ai[i]; 1463119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1464119a934aSSatish Balay rval = ib[j]*4; 1465bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1466bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1467bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1468bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1469119a934aSSatish Balay v += 16; 1470119a934aSSatish Balay } 1471119a934aSSatish Balay } 1472119a934aSSatish Balay break; 1473119a934aSSatish Balay case 5: 1474119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1475119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1476119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1477119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1478119a934aSSatish Balay ib = idx + ai[i]; 1479119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1480119a934aSSatish Balay rval = ib[j]*5; 1481bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1482bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1483bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1484bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1485bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1486119a934aSSatish Balay v += 25; 1487119a934aSSatish Balay } 1488119a934aSSatish Balay } 1489119a934aSSatish Balay break; 1490119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1491119a934aSSatish Balay default: { 1492119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1493119a934aSSatish Balay if (!a->mult_work) { 14943b547af2SSatish Balay k = PetscMax(a->m,a->n); 1495bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1496119a934aSSatish Balay CHKPTRQ(a->mult_work); 1497119a934aSSatish Balay } 1498119a934aSSatish Balay work = a->mult_work; 1499119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1500119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1501119a934aSSatish Balay ncols = n*bs; 1502119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1503119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1504119a934aSSatish Balay v += n*bs2; 1505119a934aSSatish Balay x += bs; 1506119a934aSSatish Balay workt = work; 1507119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1508119a934aSSatish Balay zb = z + bs*(*idx++); 1509bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1510119a934aSSatish Balay workt += bs; 1511119a934aSSatish Balay } 1512119a934aSSatish Balay } 1513119a934aSSatish Balay } 1514119a934aSSatish Balay } 15150419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 15160419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1517faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1518faf6580fSSatish Balay return 0; 1519faf6580fSSatish Balay } 15201c351548SSatish Balay 15215615d1e5SSatish Balay #undef __FUNC__ 15225615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 15238f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1524faf6580fSSatish Balay { 1525faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1526faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1527faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1528faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1529faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1530faf6580fSSatish Balay 1531faf6580fSSatish Balay 1532faf6580fSSatish Balay 153390f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 153490f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1535faf6580fSSatish Balay 1536648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1537648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1538648c1d95SSatish Balay 1539faf6580fSSatish Balay 1540faf6580fSSatish Balay idx = a->j; 1541faf6580fSSatish Balay v = a->a; 1542faf6580fSSatish Balay ii = a->i; 1543faf6580fSSatish Balay 1544faf6580fSSatish Balay switch (bs) { 1545faf6580fSSatish Balay case 1: 1546faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1547faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1548faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1549faf6580fSSatish Balay ib = idx + ai[i]; 1550faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1551faf6580fSSatish Balay rval = ib[j]; 1552faf6580fSSatish Balay z[rval] += *v++ * x1; 1553faf6580fSSatish Balay } 1554faf6580fSSatish Balay } 1555faf6580fSSatish Balay break; 1556faf6580fSSatish Balay case 2: 1557faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1558faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1559faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1560faf6580fSSatish Balay ib = idx + ai[i]; 1561faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1562faf6580fSSatish Balay rval = ib[j]*2; 1563faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1564faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1565faf6580fSSatish Balay v += 4; 1566faf6580fSSatish Balay } 1567faf6580fSSatish Balay } 1568faf6580fSSatish Balay break; 1569faf6580fSSatish Balay case 3: 1570faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1571faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1572faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1573faf6580fSSatish Balay ib = idx + ai[i]; 1574faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1575faf6580fSSatish Balay rval = ib[j]*3; 1576faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1577faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1578faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1579faf6580fSSatish Balay v += 9; 1580faf6580fSSatish Balay } 1581faf6580fSSatish Balay } 1582faf6580fSSatish Balay break; 1583faf6580fSSatish Balay case 4: 1584faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1585faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1586faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1587faf6580fSSatish Balay ib = idx + ai[i]; 1588faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1589faf6580fSSatish Balay rval = ib[j]*4; 1590faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1591faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1592faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1593faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1594faf6580fSSatish Balay v += 16; 1595faf6580fSSatish Balay } 1596faf6580fSSatish Balay } 1597faf6580fSSatish Balay break; 1598faf6580fSSatish Balay case 5: 1599faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1600faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1601faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1602faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1603faf6580fSSatish Balay ib = idx + ai[i]; 1604faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1605faf6580fSSatish Balay rval = ib[j]*5; 1606faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1607faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1608faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1609faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1610faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1611faf6580fSSatish Balay v += 25; 1612faf6580fSSatish Balay } 1613faf6580fSSatish Balay } 1614faf6580fSSatish Balay break; 1615faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1616faf6580fSSatish Balay default: { 1617faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1618faf6580fSSatish Balay if (!a->mult_work) { 1619faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1620faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1621faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1622faf6580fSSatish Balay } 1623faf6580fSSatish Balay work = a->mult_work; 1624faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1625faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1626faf6580fSSatish Balay ncols = n*bs; 1627faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1628faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1629faf6580fSSatish Balay v += n*bs2; 1630faf6580fSSatish Balay x += bs; 1631faf6580fSSatish Balay workt = work; 1632faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1633faf6580fSSatish Balay zb = z + bs*(*idx++); 1634faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1635faf6580fSSatish Balay workt += bs; 1636faf6580fSSatish Balay } 1637faf6580fSSatish Balay } 1638faf6580fSSatish Balay } 1639faf6580fSSatish Balay } 1640faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1641faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1642faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 16432593348eSBarry Smith return 0; 16442593348eSBarry Smith } 16452593348eSBarry Smith 16465615d1e5SSatish Balay #undef __FUNC__ 1647d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" 16488f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 16492593348eSBarry Smith { 1650b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16514e220ebcSLois Curfman McInnes 16524e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 16534e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 16544e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 16554e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 16564e220ebcSLois Curfman McInnes info->block_size = a->bs2; 16574e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 16584e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 16594e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 16604e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 16614e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 16624e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 16634e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 16644e220ebcSLois Curfman McInnes info->memory = A->mem; 16654e220ebcSLois Curfman McInnes if (A->factor) { 16664e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 16674e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 16684e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 16694e220ebcSLois Curfman McInnes } else { 16704e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 16714e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16724e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16734e220ebcSLois Curfman McInnes } 16742593348eSBarry Smith return 0; 16752593348eSBarry Smith } 16762593348eSBarry Smith 16775615d1e5SSatish Balay #undef __FUNC__ 16785615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ" 16798f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 168091d316f6SSatish Balay { 168191d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 168291d316f6SSatish Balay 1683e3372554SBarry Smith if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 168491d316f6SSatish Balay 168591d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 168691d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1687a7c10996SSatish Balay (a->nz != b->nz)) { 168891d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 168991d316f6SSatish Balay } 169091d316f6SSatish Balay 169191d316f6SSatish Balay /* if the a->i are the same */ 169291d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 169391d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 169491d316f6SSatish Balay } 169591d316f6SSatish Balay 169691d316f6SSatish Balay /* if a->j are the same */ 169791d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 169891d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 169991d316f6SSatish Balay } 170091d316f6SSatish Balay 170191d316f6SSatish Balay /* if a->a are the same */ 170291d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 170391d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 170491d316f6SSatish Balay } 170591d316f6SSatish Balay *flg = PETSC_TRUE; 170691d316f6SSatish Balay return 0; 170791d316f6SSatish Balay 170891d316f6SSatish Balay } 170991d316f6SSatish Balay 17105615d1e5SSatish Balay #undef __FUNC__ 17115615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 17128f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 171391d316f6SSatish Balay { 171491d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17157e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 171617e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 171717e48fc4SSatish Balay 17180513a670SBarry Smith if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 171917e48fc4SSatish Balay bs = a->bs; 172017e48fc4SSatish Balay aa = a->a; 172117e48fc4SSatish Balay ai = a->i; 172217e48fc4SSatish Balay aj = a->j; 172317e48fc4SSatish Balay ambs = a->mbs; 17249df24120SSatish Balay bs2 = a->bs2; 172591d316f6SSatish Balay 172691d316f6SSatish Balay VecSet(&zero,v); 172790f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1728e3372554SBarry Smith if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 172917e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 173017e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 173117e48fc4SSatish Balay if (aj[j] == i) { 173217e48fc4SSatish Balay row = i*bs; 17337e67e3f9SSatish Balay aa_j = aa+j*bs2; 17347e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 173591d316f6SSatish Balay break; 173691d316f6SSatish Balay } 173791d316f6SSatish Balay } 173891d316f6SSatish Balay } 173991d316f6SSatish Balay return 0; 174091d316f6SSatish Balay } 174191d316f6SSatish Balay 17425615d1e5SSatish Balay #undef __FUNC__ 17435615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 17448f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 17459867e207SSatish Balay { 17469867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17479867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 17487e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 17499867e207SSatish Balay 17509867e207SSatish Balay ai = a->i; 17519867e207SSatish Balay aj = a->j; 17529867e207SSatish Balay aa = a->a; 17539867e207SSatish Balay m = a->m; 17549867e207SSatish Balay n = a->n; 17559867e207SSatish Balay bs = a->bs; 17569867e207SSatish Balay mbs = a->mbs; 17579df24120SSatish Balay bs2 = a->bs2; 17589867e207SSatish Balay if (ll) { 175990f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1760e3372554SBarry Smith if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 17619867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17629867e207SSatish Balay M = ai[i+1] - ai[i]; 17639867e207SSatish Balay li = l + i*bs; 17647e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17659867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17667e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 17679867e207SSatish Balay (*v++) *= li[k%bs]; 17689867e207SSatish Balay } 17699867e207SSatish Balay } 17709867e207SSatish Balay } 17719867e207SSatish Balay } 17729867e207SSatish Balay 17739867e207SSatish Balay if (rr) { 177490f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1775e3372554SBarry Smith if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 17769867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17779867e207SSatish Balay M = ai[i+1] - ai[i]; 17787e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17799867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17809867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 17819867e207SSatish Balay for ( k=0; k<bs; k++ ) { 17829867e207SSatish Balay x = ri[k]; 17839867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 17849867e207SSatish Balay } 17859867e207SSatish Balay } 17869867e207SSatish Balay } 17879867e207SSatish Balay } 17889867e207SSatish Balay return 0; 17899867e207SSatish Balay } 17909867e207SSatish Balay 17919867e207SSatish Balay 1792b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1793b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 17942a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1795736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1796736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 17971a6a6d98SBarry Smith 17981a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 17991a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 18001a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 18011a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 18021a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 18031a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 180448e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 18051a6a6d98SBarry Smith 18067fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 18077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 18087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 18097fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 18107fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 18117fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 18122593348eSBarry Smith 18135615d1e5SSatish Balay #undef __FUNC__ 18145615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 18158f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 18162593348eSBarry Smith { 1817b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18182593348eSBarry Smith Scalar *v = a->a; 18192593348eSBarry Smith double sum = 0.0; 18209df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 18212593348eSBarry Smith 18222593348eSBarry Smith if (type == NORM_FROBENIUS) { 18230419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 18242593348eSBarry Smith #if defined(PETSC_COMPLEX) 18252593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 18262593348eSBarry Smith #else 18272593348eSBarry Smith sum += (*v)*(*v); v++; 18282593348eSBarry Smith #endif 18292593348eSBarry Smith } 18302593348eSBarry Smith *norm = sqrt(sum); 18312593348eSBarry Smith } 18322593348eSBarry Smith else { 1833e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 18342593348eSBarry Smith } 18352593348eSBarry Smith return 0; 18362593348eSBarry Smith } 18372593348eSBarry Smith 18383eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 18392593348eSBarry Smith /* 18402593348eSBarry Smith note: This can only work for identity for row and col. It would 18412593348eSBarry Smith be good to check this and otherwise generate an error. 18422593348eSBarry Smith */ 18435615d1e5SSatish Balay #undef __FUNC__ 18445615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ" 18458f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 18462593348eSBarry Smith { 1847b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18482593348eSBarry Smith Mat outA; 1849de6a44a3SBarry Smith int ierr; 18502593348eSBarry Smith 1851e3372554SBarry Smith if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 18522593348eSBarry Smith 18532593348eSBarry Smith outA = inA; 18542593348eSBarry Smith inA->factor = FACTOR_LU; 18552593348eSBarry Smith a->row = row; 18562593348eSBarry Smith a->col = col; 18572593348eSBarry Smith 1858eed86810SBarry Smith if (!a->solve_work) { 1859de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1860eed86810SBarry Smith PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1861eed86810SBarry Smith } 18622593348eSBarry Smith 18632593348eSBarry Smith if (!a->diag) { 1864b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 18652593348eSBarry Smith } 18667fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 18673eee16b0SBarry Smith 18683eee16b0SBarry Smith /* 18693eee16b0SBarry Smith Blocksize 4 has a special faster solver for ILU(0) factorization 18703eee16b0SBarry Smith with natural ordering 18713eee16b0SBarry Smith */ 18723eee16b0SBarry Smith if (a->bs == 4) { 18733eee16b0SBarry Smith inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 18743eee16b0SBarry Smith } 18753eee16b0SBarry Smith 18762593348eSBarry Smith return 0; 18772593348eSBarry Smith } 18782593348eSBarry Smith 18795615d1e5SSatish Balay #undef __FUNC__ 18805615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ" 18818f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 18822593348eSBarry Smith { 1883b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18849df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1885b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1886b6490206SBarry Smith PLogFlops(totalnz); 18872593348eSBarry Smith return 0; 18882593348eSBarry Smith } 18892593348eSBarry Smith 18905615d1e5SSatish Balay #undef __FUNC__ 18915615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ" 18928f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 18937e67e3f9SSatish Balay { 18947e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18957e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1896a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 18979df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 18987e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 18997e67e3f9SSatish Balay 19007e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 19017e67e3f9SSatish Balay row = im[k]; brow = row/bs; 1902e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 1903e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 1904a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 19057e67e3f9SSatish Balay nrow = ailen[brow]; 19067e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 1907e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1908e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1909a7c10996SSatish Balay col = in[l] ; 19107e67e3f9SSatish Balay bcol = col/bs; 19117e67e3f9SSatish Balay cidx = col%bs; 19127e67e3f9SSatish Balay ridx = row%bs; 19137e67e3f9SSatish Balay high = nrow; 19147e67e3f9SSatish Balay low = 0; /* assume unsorted */ 19157e67e3f9SSatish Balay while (high-low > 5) { 19167e67e3f9SSatish Balay t = (low+high)/2; 19177e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 19187e67e3f9SSatish Balay else low = t; 19197e67e3f9SSatish Balay } 19207e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 19217e67e3f9SSatish Balay if (rp[i] > bcol) break; 19227e67e3f9SSatish Balay if (rp[i] == bcol) { 19237e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 19247e67e3f9SSatish Balay goto finished; 19257e67e3f9SSatish Balay } 19267e67e3f9SSatish Balay } 19277e67e3f9SSatish Balay *v++ = zero; 19287e67e3f9SSatish Balay finished:; 19297e67e3f9SSatish Balay } 19307e67e3f9SSatish Balay } 19317e67e3f9SSatish Balay return 0; 19327e67e3f9SSatish Balay } 19337e67e3f9SSatish Balay 19345615d1e5SSatish Balay #undef __FUNC__ 1935d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 19368f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 19375a838052SSatish Balay { 19385a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 19395a838052SSatish Balay *bs = baij->bs; 19405a838052SSatish Balay return 0; 19415a838052SSatish Balay } 19425a838052SSatish Balay 1943d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 19445615d1e5SSatish Balay #undef __FUNC__ 19455615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1946d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1947d9b7c43dSSatish Balay { 1948d9b7c43dSSatish Balay int i,row; 1949d9b7c43dSSatish Balay row = idx[0]; 1950d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1951d9b7c43dSSatish Balay 1952d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1953d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1954d9b7c43dSSatish Balay } 1955d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1956d9b7c43dSSatish Balay return 0; 1957d9b7c43dSSatish Balay } 1958d9b7c43dSSatish Balay 19595615d1e5SSatish Balay #undef __FUNC__ 19605615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ" 19618f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1962d9b7c43dSSatish Balay { 1963d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1964d9b7c43dSSatish Balay IS is_local; 1965d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1966d9b7c43dSSatish Balay PetscTruth flg; 1967d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1968d9b7c43dSSatish Balay 1969d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1970d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1971d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1972537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1973d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1974d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1975d9b7c43dSSatish Balay 1976d9b7c43dSSatish Balay i = 0; 1977d9b7c43dSSatish Balay while (i < is_n) { 1978e3372554SBarry Smith if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1979d9b7c43dSSatish Balay flg = PETSC_FALSE; 1980d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1981d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1982d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1983d9b7c43dSSatish Balay i += bs; 1984d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1985d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1986d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1987d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1988d9b7c43dSSatish Balay aa[0] = zero; 1989d9b7c43dSSatish Balay aa+=bs; 1990d9b7c43dSSatish Balay } 1991d9b7c43dSSatish Balay i++; 1992d9b7c43dSSatish Balay } 1993d9b7c43dSSatish Balay } 1994d9b7c43dSSatish Balay if (diag) { 1995d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1996d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1997d9b7c43dSSatish Balay } 1998d9b7c43dSSatish Balay } 1999d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 2000d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2001d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 20029a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2003d9b7c43dSSatish Balay 2004d9b7c43dSSatish Balay return 0; 2005d9b7c43dSSatish Balay } 20061c351548SSatish Balay 20075615d1e5SSatish Balay #undef __FUNC__ 2008d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2009ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 2010ba4ca20aSSatish Balay { 2011ba4ca20aSSatish Balay static int called = 0; 2012ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 2013ba4ca20aSSatish Balay 2014ba4ca20aSSatish Balay if (called) return 0; else called = 1; 2015ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2016ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 2017ba4ca20aSSatish Balay return 0; 2018ba4ca20aSSatish Balay } 2019d9b7c43dSSatish Balay 20202593348eSBarry Smith /* -------------------------------------------------------------------*/ 2021cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 20229867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2023f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2024faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 20251a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 2026b6490206SBarry Smith 0,0, 2027de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 2028b6490206SBarry Smith 0, 2029f2501298SSatish Balay MatTranspose_SeqBAIJ, 203017e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 20319867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2032584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 2033b6490206SBarry Smith 0, 2034d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 20357fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2036b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2037de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 2038d402145bSBarry Smith 0,0, 2039b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 2040b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 2041af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 20427e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 2043ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 20443b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 20453b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 204692c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 204792c4ed94SBarry Smith 0,0,0,0,0,0, 204892c4ed94SBarry Smith MatSetValuesBlocked_SeqBAIJ}; 20492593348eSBarry Smith 20505615d1e5SSatish Balay #undef __FUNC__ 20515615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ" 20522593348eSBarry Smith /*@C 205344cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 205444cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 205544cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 20562bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 20572bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 20582593348eSBarry Smith 20592593348eSBarry Smith Input Parameters: 2060029af93fSBarry Smith . comm - MPI communicator, set to PETSC_COMM_SELF 2061b6490206SBarry Smith . bs - size of block 20622593348eSBarry Smith . m - number of rows 20632593348eSBarry Smith . n - number of columns 2064b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 20652bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 20662bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 20672593348eSBarry Smith 20682593348eSBarry Smith Output Parameter: 20692593348eSBarry Smith . A - the matrix 20702593348eSBarry Smith 20710513a670SBarry Smith Options Database Keys: 20720513a670SBarry Smith $ -mat_no_unroll - uses code that does not unroll the loops in the 20730effe34fSLois Curfman McInnes $ block calculations (much slower) 20740513a670SBarry Smith $ -mat_block_size - size of the blocks to use 20750513a670SBarry Smith 20762593348eSBarry Smith Notes: 207744cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 20782593348eSBarry Smith storage. That is, the stored row and column indices can begin at 207944cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 20802593348eSBarry Smith 20812593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 20822593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 20832593348eSBarry Smith allocation. For additional details, see the users manual chapter on 20846da5968aSLois Curfman McInnes matrices. 20852593348eSBarry Smith 208644cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 20872593348eSBarry Smith @*/ 2088b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 20892593348eSBarry Smith { 20902593348eSBarry Smith Mat B; 2091b6490206SBarry Smith Mat_SeqBAIJ *b; 20923b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 20933b2fbd54SBarry Smith 20943b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 2095e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2096b6490206SBarry Smith 20970513a670SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 20980513a670SBarry Smith 2099f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 2100e3372554SBarry Smith SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 21012593348eSBarry Smith 21022593348eSBarry Smith *A = 0; 2103d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 21042593348eSBarry Smith PLogObjectCreate(B); 2105b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 210644cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 21072593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 21081a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 21091a6a6d98SBarry Smith if (!flg) { 21107fc0212eSBarry Smith switch (bs) { 21117fc0212eSBarry Smith case 1: 21127fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21131a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 211439b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 2115f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 21167fc0212eSBarry Smith break; 21174eeb42bcSBarry Smith case 2: 21184eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 21191a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 212039b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 2121f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 21226d84be18SBarry Smith break; 2123f327dd97SBarry Smith case 3: 2124f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 21251a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 212639b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 2127f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 21284eeb42bcSBarry Smith break; 21296d84be18SBarry Smith case 4: 21306d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 21311a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 213239b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 2133f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 21346d84be18SBarry Smith break; 21356d84be18SBarry Smith case 5: 21366d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 21371a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 213839b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 2139f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 21406d84be18SBarry Smith break; 214148e9ddb2SSatish Balay case 7: 214248e9ddb2SSatish Balay B->ops.mult = MatMult_SeqBAIJ_7; 214348e9ddb2SSatish Balay B->ops.solve = MatSolve_SeqBAIJ_7; 214448e9ddb2SSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_7; 214548e9ddb2SSatish Balay break; 21467fc0212eSBarry Smith } 21471a6a6d98SBarry Smith } 2148b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 2149b6490206SBarry Smith B->view = MatView_SeqBAIJ; 21502593348eSBarry Smith B->factor = 0; 21512593348eSBarry Smith B->lupivotthreshold = 1.0; 215290f02eecSBarry Smith B->mapping = 0; 21532593348eSBarry Smith b->row = 0; 21542593348eSBarry Smith b->col = 0; 21552593348eSBarry Smith b->reallocs = 0; 21562593348eSBarry Smith 215744cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 215844cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 2159b6490206SBarry Smith b->mbs = mbs; 2160f2501298SSatish Balay b->nbs = nbs; 2161b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 21622593348eSBarry Smith if (nnz == PETSC_NULL) { 2163119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 21642593348eSBarry Smith else if (nz <= 0) nz = 1; 2165b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2166b6490206SBarry Smith nz = nz*mbs; 21672593348eSBarry Smith } 21682593348eSBarry Smith else { 21692593348eSBarry Smith nz = 0; 2170b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 21712593348eSBarry Smith } 21722593348eSBarry Smith 21732593348eSBarry Smith /* allocate the matrix space */ 21747e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 21752593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 21767e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 21777e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 21782593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 21792593348eSBarry Smith b->i = b->j + nz; 21802593348eSBarry Smith b->singlemalloc = 1; 21812593348eSBarry Smith 2182de6a44a3SBarry Smith b->i[0] = 0; 2183b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 21842593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 21852593348eSBarry Smith } 21862593348eSBarry Smith 2187b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2188b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2189f09e8eb9SSatish Balay PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2190b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 21912593348eSBarry Smith 2192b6490206SBarry Smith b->bs = bs; 21939df24120SSatish Balay b->bs2 = bs2; 2194b6490206SBarry Smith b->mbs = mbs; 21952593348eSBarry Smith b->nz = 0; 219618e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 21972593348eSBarry Smith b->sorted = 0; 21982593348eSBarry Smith b->roworiented = 1; 21992593348eSBarry Smith b->nonew = 0; 22002593348eSBarry Smith b->diag = 0; 22012593348eSBarry Smith b->solve_work = 0; 2202de6a44a3SBarry Smith b->mult_work = 0; 22032593348eSBarry Smith b->spptr = 0; 22044e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 22054e220ebcSLois Curfman McInnes 22062593348eSBarry Smith *A = B; 22072593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 22082593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 22092593348eSBarry Smith return 0; 22102593348eSBarry Smith } 22112593348eSBarry Smith 22125615d1e5SSatish Balay #undef __FUNC__ 22135615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2214b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 22152593348eSBarry Smith { 22162593348eSBarry Smith Mat C; 2217b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 22189df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2219de6a44a3SBarry Smith 2220e3372554SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 22212593348eSBarry Smith 22222593348eSBarry Smith *B = 0; 2223d4bb536fSBarry Smith PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 22242593348eSBarry Smith PLogObjectCreate(C); 2225b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 22262593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2227b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 2228b6490206SBarry Smith C->view = MatView_SeqBAIJ; 22292593348eSBarry Smith C->factor = A->factor; 22302593348eSBarry Smith c->row = 0; 22312593348eSBarry Smith c->col = 0; 22322593348eSBarry Smith C->assembled = PETSC_TRUE; 22332593348eSBarry Smith 223444cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 223544cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 223644cd7ae7SLois Curfman McInnes C->M = a->m; 223744cd7ae7SLois Curfman McInnes C->N = a->n; 223844cd7ae7SLois Curfman McInnes 2239b6490206SBarry Smith c->bs = a->bs; 22409df24120SSatish Balay c->bs2 = a->bs2; 2241b6490206SBarry Smith c->mbs = a->mbs; 2242b6490206SBarry Smith c->nbs = a->nbs; 22432593348eSBarry Smith 2244b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2245b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2246b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22472593348eSBarry Smith c->imax[i] = a->imax[i]; 22482593348eSBarry Smith c->ilen[i] = a->ilen[i]; 22492593348eSBarry Smith } 22502593348eSBarry Smith 22512593348eSBarry Smith /* allocate the matrix space */ 22522593348eSBarry Smith c->singlemalloc = 1; 22537e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 22542593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 22557e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 2256de6a44a3SBarry Smith c->i = c->j + nz; 2257b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2258b6490206SBarry Smith if (mbs > 0) { 2259de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 22602593348eSBarry Smith if (cpvalues == COPY_VALUES) { 22617e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 22622593348eSBarry Smith } 22632593348eSBarry Smith } 22642593348eSBarry Smith 2265f09e8eb9SSatish Balay PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 22662593348eSBarry Smith c->sorted = a->sorted; 22672593348eSBarry Smith c->roworiented = a->roworiented; 22682593348eSBarry Smith c->nonew = a->nonew; 22692593348eSBarry Smith 22702593348eSBarry Smith if (a->diag) { 2271b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2272b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2273b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22742593348eSBarry Smith c->diag[i] = a->diag[i]; 22752593348eSBarry Smith } 22762593348eSBarry Smith } 22772593348eSBarry Smith else c->diag = 0; 22782593348eSBarry Smith c->nz = a->nz; 22792593348eSBarry Smith c->maxnz = a->maxnz; 22802593348eSBarry Smith c->solve_work = 0; 22812593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 22827fc0212eSBarry Smith c->mult_work = 0; 22832593348eSBarry Smith *B = C; 22842593348eSBarry Smith return 0; 22852593348eSBarry Smith } 22862593348eSBarry Smith 22875615d1e5SSatish Balay #undef __FUNC__ 22885615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ" 228919bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 22902593348eSBarry Smith { 2291b6490206SBarry Smith Mat_SeqBAIJ *a; 22922593348eSBarry Smith Mat B; 2293de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2294b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 229535aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2296a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 2297b6490206SBarry Smith Scalar *aa; 229819bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 22992593348eSBarry Smith 23001a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2301de6a44a3SBarry Smith bs2 = bs*bs; 2302b6490206SBarry Smith 23032593348eSBarry Smith MPI_Comm_size(comm,&size); 2304e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"view must have one processor"); 230590ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2306*0752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 2307e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 23082593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 23092593348eSBarry Smith 2310e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 231135aab85fSBarry Smith 231235aab85fSBarry Smith /* 231335aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 231435aab85fSBarry Smith divisible by the blocksize 231535aab85fSBarry Smith */ 2316b6490206SBarry Smith mbs = M/bs; 231735aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 231835aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 231935aab85fSBarry Smith else mbs++; 232035aab85fSBarry Smith if (extra_rows) { 2321537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 232235aab85fSBarry Smith } 2323b6490206SBarry Smith 23242593348eSBarry Smith /* read in row lengths */ 232535aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2326*0752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 232735aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 23282593348eSBarry Smith 2329b6490206SBarry Smith /* read in column indices */ 233035aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2331*0752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 233235aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2333b6490206SBarry Smith 2334b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2335b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2336b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 233735aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 233835aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 233935aab85fSBarry Smith masked = mask + mbs; 2340b6490206SBarry Smith rowcount = 0; nzcount = 0; 2341b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 234235aab85fSBarry Smith nmask = 0; 2343b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2344b6490206SBarry Smith kmax = rowlengths[rowcount]; 2345b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 234635aab85fSBarry Smith tmp = jj[nzcount++]/bs; 234735aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2348b6490206SBarry Smith } 2349b6490206SBarry Smith rowcount++; 2350b6490206SBarry Smith } 235135aab85fSBarry Smith browlengths[i] += nmask; 235235aab85fSBarry Smith /* zero out the mask elements we set */ 235335aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2354b6490206SBarry Smith } 2355b6490206SBarry Smith 23562593348eSBarry Smith /* create our matrix */ 23573eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 23582593348eSBarry Smith B = *A; 2359b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 23602593348eSBarry Smith 23612593348eSBarry Smith /* set matrix "i" values */ 2362de6a44a3SBarry Smith a->i[0] = 0; 2363b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 2364b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2365b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 23662593348eSBarry Smith } 2367b6490206SBarry Smith a->nz = 0; 2368b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 23692593348eSBarry Smith 2370b6490206SBarry Smith /* read in nonzero values */ 237135aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2372*0752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 237335aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2374b6490206SBarry Smith 2375b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2376b6490206SBarry Smith nzcount = 0; jcount = 0; 2377b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2378b6490206SBarry Smith nzcountb = nzcount; 237935aab85fSBarry Smith nmask = 0; 2380b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2381b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2382b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 238335aab85fSBarry Smith tmp = jj[nzcount++]/bs; 238435aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2385b6490206SBarry Smith } 2386b6490206SBarry Smith rowcount++; 2387b6490206SBarry Smith } 2388de6a44a3SBarry Smith /* sort the masked values */ 238977c4ece6SBarry Smith PetscSortInt(nmask,masked); 2390de6a44a3SBarry Smith 2391b6490206SBarry Smith /* set "j" values into matrix */ 2392b6490206SBarry Smith maskcount = 1; 239335aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 239435aab85fSBarry Smith a->j[jcount++] = masked[j]; 2395de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2396b6490206SBarry Smith } 2397b6490206SBarry Smith /* set "a" values into matrix */ 2398de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2399b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2400b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2401b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2402de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2403de6a44a3SBarry Smith block = mask[tmp] - 1; 2404de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2405de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2406b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2407b6490206SBarry Smith } 2408b6490206SBarry Smith } 240935aab85fSBarry Smith /* zero out the mask elements we set */ 241035aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2411b6490206SBarry Smith } 2412e3372554SBarry Smith if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2413b6490206SBarry Smith 2414b6490206SBarry Smith PetscFree(rowlengths); 2415b6490206SBarry Smith PetscFree(browlengths); 2416b6490206SBarry Smith PetscFree(aa); 2417b6490206SBarry Smith PetscFree(jj); 2418b6490206SBarry Smith PetscFree(mask); 2419b6490206SBarry Smith 2420b6490206SBarry Smith B->assembled = PETSC_TRUE; 2421de6a44a3SBarry Smith 2422de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2423de6a44a3SBarry Smith if (flg) { 242419bcc07fSBarry Smith Viewer tviewer; 242519bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2426639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 242719bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 242819bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2429de6a44a3SBarry Smith } 2430de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2431de6a44a3SBarry Smith if (flg) { 243219bcc07fSBarry Smith Viewer tviewer; 243319bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2434639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 243519bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 243619bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2437de6a44a3SBarry Smith } 2438de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2439de6a44a3SBarry Smith if (flg) { 244019bcc07fSBarry Smith Viewer tviewer; 244119bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 244219bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 244319bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2444de6a44a3SBarry Smith } 2445de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2446de6a44a3SBarry Smith if (flg) { 244719bcc07fSBarry Smith Viewer tviewer; 244819bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2449639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 245019bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 245119bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2452de6a44a3SBarry Smith } 2453de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2454de6a44a3SBarry Smith if (flg) { 245519bcc07fSBarry Smith Viewer tviewer; 245619bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 245719bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 245819bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 245919bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2460de6a44a3SBarry Smith } 24612593348eSBarry Smith return 0; 24622593348eSBarry Smith } 24632593348eSBarry Smith 24642593348eSBarry Smith 24652593348eSBarry Smith 2466