1*9c01be13SBarry Smith 2a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER 3*9c01be13SBarry Smith static char vcid[] = "$Id: baij.c,v 1.112 1997/09/26 02:19:29 bsmith Exp bsmith $"; 42593348eSBarry Smith #endif 52593348eSBarry Smith 62593348eSBarry Smith /* 7b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 82593348eSBarry Smith matrix storage format. 92593348eSBarry Smith */ 103369ce9aSBarry Smith 113369ce9aSBarry Smith #include "pinclude/pviewer.h" 123369ce9aSBarry Smith #include "sys.h" 1370f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 141a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 151a6a6d98SBarry Smith #include "src/inline/spops.h" 162593348eSBarry Smith #include "petsc.h" 173b547af2SSatish Balay 182593348eSBarry Smith 19de6a44a3SBarry Smith /* 20de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 21de6a44a3SBarry Smith */ 22de6a44a3SBarry Smith 235615d1e5SSatish Balay #undef __FUNC__ 245615d1e5SSatish Balay #define __FUNC__ "MatMarkDiag_SeqBAIJ" 25de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 26de6a44a3SBarry Smith { 27de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 287fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 29de6a44a3SBarry Smith 30de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 31de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 327fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 33de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 34de6a44a3SBarry Smith if (a->j[j] == i) { 35de6a44a3SBarry Smith diag[i] = j; 36de6a44a3SBarry Smith break; 37de6a44a3SBarry Smith } 38de6a44a3SBarry Smith } 39de6a44a3SBarry Smith } 40de6a44a3SBarry Smith a->diag = diag; 41de6a44a3SBarry Smith return 0; 42de6a44a3SBarry Smith } 432593348eSBarry Smith 442593348eSBarry Smith 453b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 463b2fbd54SBarry Smith 475615d1e5SSatish Balay #undef __FUNC__ 485615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 493b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 503b2fbd54SBarry Smith PetscTruth *done) 513b2fbd54SBarry Smith { 523b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 533b2fbd54SBarry Smith int ierr, n = a->mbs,i; 543b2fbd54SBarry Smith 553b2fbd54SBarry Smith *nn = n; 563b2fbd54SBarry Smith if (!ia) return 0; 573b2fbd54SBarry Smith if (symmetric) { 583b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 593b2fbd54SBarry Smith } else if (oshift == 1) { 603b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 613b2fbd54SBarry Smith int nz = a->i[n] + 1; 623b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 633b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 643b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 653b2fbd54SBarry Smith } else { 663b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 673b2fbd54SBarry Smith } 683b2fbd54SBarry Smith 693b2fbd54SBarry Smith return 0; 703b2fbd54SBarry Smith } 713b2fbd54SBarry Smith 725615d1e5SSatish Balay #undef __FUNC__ 73d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 743b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 753b2fbd54SBarry Smith PetscTruth *done) 763b2fbd54SBarry Smith { 773b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 783b2fbd54SBarry Smith int i,n = a->mbs; 793b2fbd54SBarry Smith 803b2fbd54SBarry Smith if (!ia) return 0; 813b2fbd54SBarry Smith if (symmetric) { 823b2fbd54SBarry Smith PetscFree(*ia); 833b2fbd54SBarry Smith PetscFree(*ja); 84af5da2bfSBarry Smith } else if (oshift == 1) { 853b2fbd54SBarry Smith int nz = a->i[n]; 863b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 873b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 883b2fbd54SBarry Smith } 893b2fbd54SBarry Smith return 0; 903b2fbd54SBarry Smith } 913b2fbd54SBarry Smith 923b2fbd54SBarry Smith 935615d1e5SSatish Balay #undef __FUNC__ 94d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Binary" 95b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 962593348eSBarry Smith { 97b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 989df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 99b6490206SBarry Smith Scalar *aa; 1002593348eSBarry Smith 10190ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1022593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 1032593348eSBarry Smith col_lens[0] = MAT_COOKIE; 1043b2fbd54SBarry Smith 1052593348eSBarry Smith col_lens[1] = a->m; 1062593348eSBarry Smith col_lens[2] = a->n; 1077e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 1082593348eSBarry Smith 1092593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 110b6490206SBarry Smith count = 0; 111b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 112b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 113b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 114b6490206SBarry Smith } 1152593348eSBarry Smith } 1160752156aSBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 1172593348eSBarry Smith PetscFree(col_lens); 1182593348eSBarry Smith 1192593348eSBarry Smith /* store column indices (zero start index) */ 12066963ce1SSatish Balay jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 121b6490206SBarry Smith count = 0; 122b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 123b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 124b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 125b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 126b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1272593348eSBarry Smith } 1282593348eSBarry Smith } 129b6490206SBarry Smith } 130b6490206SBarry Smith } 1310752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 132b6490206SBarry Smith PetscFree(jj); 1332593348eSBarry Smith 1342593348eSBarry Smith /* store nonzero values */ 13566963ce1SSatish Balay aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 136b6490206SBarry Smith count = 0; 137b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 138b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 139b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 140b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1417e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 142b6490206SBarry Smith } 143b6490206SBarry Smith } 144b6490206SBarry Smith } 145b6490206SBarry Smith } 1460752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 147b6490206SBarry Smith PetscFree(aa); 1482593348eSBarry Smith return 0; 1492593348eSBarry Smith } 1502593348eSBarry Smith 1515615d1e5SSatish Balay #undef __FUNC__ 152d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_ASCII" 153b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1542593348eSBarry Smith { 155b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1569df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1572593348eSBarry Smith FILE *fd; 1582593348eSBarry Smith char *outputname; 1592593348eSBarry Smith 16090ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1612593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 16290ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 163639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1647ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1652593348eSBarry Smith } 166639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 167e3372554SBarry Smith SETERRQ(1,0,"Matlab format not supported"); 1682593348eSBarry Smith } 169639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 17044cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 17144cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 17244cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 17344cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 17444cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 17544cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 176766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 17744cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 17844cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 179766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 180766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 181766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 18244cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 18344cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 18444cd7ae7SLois Curfman McInnes #else 18544cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 18644cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 18744cd7ae7SLois Curfman McInnes #endif 18844cd7ae7SLois Curfman McInnes } 18944cd7ae7SLois Curfman McInnes } 19044cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 19144cd7ae7SLois Curfman McInnes } 19244cd7ae7SLois Curfman McInnes } 19344cd7ae7SLois Curfman McInnes } 1942593348eSBarry Smith else { 195b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 196b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 197b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 198b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 199b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 20088685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 201766eeae4SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 20288685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 2037e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 20488685aaeSLois Curfman McInnes } 205766eeae4SLois Curfman McInnes else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 206766eeae4SLois Curfman McInnes fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 207766eeae4SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 208766eeae4SLois Curfman McInnes } 20988685aaeSLois Curfman McInnes else { 2107e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 21188685aaeSLois Curfman McInnes } 21288685aaeSLois Curfman McInnes #else 2137e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 21488685aaeSLois Curfman McInnes #endif 2152593348eSBarry Smith } 2162593348eSBarry Smith } 2172593348eSBarry Smith fprintf(fd,"\n"); 2182593348eSBarry Smith } 2192593348eSBarry Smith } 220b6490206SBarry Smith } 2212593348eSBarry Smith fflush(fd); 2222593348eSBarry Smith return 0; 2232593348eSBarry Smith } 2242593348eSBarry Smith 2255615d1e5SSatish Balay #undef __FUNC__ 226d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ_Draw" 2273270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2283270192aSSatish Balay { 2293270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2303270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2313270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2323270192aSSatish Balay Scalar *aa; 2333270192aSSatish Balay Draw draw; 2343270192aSSatish Balay DrawButton button; 2353270192aSSatish Balay PetscTruth isnull; 2363270192aSSatish Balay 2373270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2383270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2393270192aSSatish Balay 2403270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2413270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2423270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2433270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2443270192aSSatish Balay color = DRAW_BLUE; 2453270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2463270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2473270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2483270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2493270192aSSatish Balay aa = a->a + j*bs2; 2503270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2513270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2523270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 253b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2543270192aSSatish Balay } 2553270192aSSatish Balay } 2563270192aSSatish Balay } 2573270192aSSatish Balay } 2583270192aSSatish Balay color = DRAW_CYAN; 2593270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2603270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2613270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2623270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2633270192aSSatish Balay aa = a->a + j*bs2; 2643270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2653270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2663270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 267b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2683270192aSSatish Balay } 2693270192aSSatish Balay } 2703270192aSSatish Balay } 2713270192aSSatish Balay } 2723270192aSSatish Balay 2733270192aSSatish Balay color = DRAW_RED; 2743270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2753270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2763270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2773270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2783270192aSSatish Balay aa = a->a + j*bs2; 2793270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2803270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2813270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 282b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 2833270192aSSatish Balay } 2843270192aSSatish Balay } 2853270192aSSatish Balay } 2863270192aSSatish Balay } 2873270192aSSatish Balay 2883270192aSSatish Balay DrawFlush(draw); 2893270192aSSatish Balay DrawGetPause(draw,&pause); 2903270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2913270192aSSatish Balay 2923270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2933270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2943270192aSSatish Balay while (button != BUTTON_RIGHT) { 2953270192aSSatish Balay DrawClear(draw); 2963270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2973270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2983270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2993270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 3003270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 3013270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 3023270192aSSatish Balay w *= scale; h *= scale; 3033270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 3043270192aSSatish Balay color = DRAW_BLUE; 3053270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3063270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3073270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3083270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3093270192aSSatish Balay aa = a->a + j*bs2; 3103270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3113270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3123270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 313b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3143270192aSSatish Balay } 3153270192aSSatish Balay } 3163270192aSSatish Balay } 3173270192aSSatish Balay } 3183270192aSSatish Balay color = DRAW_CYAN; 3193270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3203270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3213270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3223270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3233270192aSSatish Balay aa = a->a + j*bs2; 3243270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3253270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3263270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 327b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3283270192aSSatish Balay } 3293270192aSSatish Balay } 3303270192aSSatish Balay } 3313270192aSSatish Balay } 3323270192aSSatish Balay 3333270192aSSatish Balay color = DRAW_RED; 3343270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3353270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3363270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3373270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3383270192aSSatish Balay aa = a->a + j*bs2; 3393270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3403270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3413270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 342b34f160eSSatish Balay DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 3433270192aSSatish Balay } 3443270192aSSatish Balay } 3453270192aSSatish Balay } 3463270192aSSatish Balay } 3473270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3483270192aSSatish Balay } 3493270192aSSatish Balay return 0; 3503270192aSSatish Balay } 3513270192aSSatish Balay 3525615d1e5SSatish Balay #undef __FUNC__ 353d4bb536fSBarry Smith #define __FUNC__ "MatView_SeqBAIJ" 3548f6be9afSLois Curfman McInnes int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3552593348eSBarry Smith { 3562593348eSBarry Smith Mat A = (Mat) obj; 35719bcc07fSBarry Smith ViewerType vtype; 35819bcc07fSBarry Smith int ierr; 3592593348eSBarry Smith 36019bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36119bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 362e3372554SBarry Smith SETERRQ(1,0,"Matlab viewer not supported"); 3632593348eSBarry Smith } 36419bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 365b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3662593348eSBarry Smith } 36719bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 368b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3692593348eSBarry Smith } 37019bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3713270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3722593348eSBarry Smith } 3732593348eSBarry Smith return 0; 3742593348eSBarry Smith } 375b6490206SBarry Smith 376119a934aSSatish Balay #define CHUNKSIZE 10 377cd0e1443SSatish Balay 3785615d1e5SSatish Balay #undef __FUNC__ 3795615d1e5SSatish Balay #define __FUNC__ "MatSetValues_SeqBAIJ" 380639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 381cd0e1443SSatish Balay { 382cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 383cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 384cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 385a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3869df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 387cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 388cd0e1443SSatish Balay 389cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 390cd0e1443SSatish Balay row = im[k]; brow = row/bs; 3913b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 392e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 393e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 3943b2fbd54SBarry Smith #endif 3952c3acbe9SBarry Smith rp = aj + ai[brow]; 3962c3acbe9SBarry Smith ap = aa + bs2*ai[brow]; 3972c3acbe9SBarry Smith rmax = imax[brow]; 3982c3acbe9SBarry Smith nrow = ailen[brow]; 399cd0e1443SSatish Balay low = 0; 400cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 4013b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 402e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 403e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 4043b2fbd54SBarry Smith #endif 405a7c10996SSatish Balay col = in[l]; bcol = col/bs; 406cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 407cd0e1443SSatish Balay if (roworiented) { 408cd0e1443SSatish Balay value = *v++; 409cd0e1443SSatish Balay } 410cd0e1443SSatish Balay else { 411cd0e1443SSatish Balay value = v[k + l*m]; 412cd0e1443SSatish Balay } 413cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 4142c3acbe9SBarry Smith while (high-low > 7) { 415cd0e1443SSatish Balay t = (low+high)/2; 416cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 417cd0e1443SSatish Balay else low = t; 418cd0e1443SSatish Balay } 419cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 420cd0e1443SSatish Balay if (rp[i] > bcol) break; 421cd0e1443SSatish Balay if (rp[i] == bcol) { 4227e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 423cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 424cd0e1443SSatish Balay else *bap = value; 425f1241b54SBarry Smith goto noinsert1; 426cd0e1443SSatish Balay } 427cd0e1443SSatish Balay } 428c2653b3dSLois Curfman McInnes if (nonew == 1) goto noinsert1; 42911ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 430cd0e1443SSatish Balay if (nrow >= rmax) { 431cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 432cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 433cd0e1443SSatish Balay Scalar *new_a; 434cd0e1443SSatish Balay 43511ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 43696854ed6SLois Curfman McInnes 43796854ed6SLois Curfman McInnes /* Malloc new storage space */ 4387e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 439cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4407e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 441cd0e1443SSatish Balay new_i = new_j + new_nz; 442cd0e1443SSatish Balay 443cd0e1443SSatish Balay /* copy over old data into new slots */ 444cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4457e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 446a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 447a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 448a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 449cd0e1443SSatish Balay len*sizeof(int)); 450a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 451a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 452a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 453a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 454cd0e1443SSatish Balay /* free up old matrix storage */ 455cd0e1443SSatish Balay PetscFree(a->a); 456cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 457cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 458cd0e1443SSatish Balay a->singlemalloc = 1; 459cd0e1443SSatish Balay 460a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 461cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4627e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 46318e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 464cd0e1443SSatish Balay a->reallocs++; 465119a934aSSatish Balay a->nz++; 466cd0e1443SSatish Balay } 4677e67e3f9SSatish Balay N = nrow++ - 1; 468cd0e1443SSatish Balay /* shift up all the later entries in this row */ 469cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 470cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4717e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 472cd0e1443SSatish Balay } 4737e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 474cd0e1443SSatish Balay rp[i] = bcol; 4757e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 476f1241b54SBarry Smith noinsert1:; 477cd0e1443SSatish Balay low = i; 478cd0e1443SSatish Balay } 479cd0e1443SSatish Balay ailen[brow] = nrow; 480cd0e1443SSatish Balay } 481cd0e1443SSatish Balay return 0; 482cd0e1443SSatish Balay } 483cd0e1443SSatish Balay 4845615d1e5SSatish Balay #undef __FUNC__ 48505a5f48eSSatish Balay #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 48692c4ed94SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 48792c4ed94SBarry Smith { 48892c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4898a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 490dd9472c6SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 491dd9472c6SBarry Smith int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 4920e324ae4SSatish Balay Scalar *ap,*value=v,*aa=a->a,*bap; 49392c4ed94SBarry Smith 4940e324ae4SSatish Balay if (roworiented) { 4950e324ae4SSatish Balay stepval = (n-1)*bs; 4960e324ae4SSatish Balay } else { 4970e324ae4SSatish Balay stepval = (m-1)*bs; 4980e324ae4SSatish Balay } 49992c4ed94SBarry Smith for ( k=0; k<m; k++ ) { /* loop over added rows */ 50092c4ed94SBarry Smith row = im[k]; 50192c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 50292c4ed94SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 5038a84c255SSatish Balay if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 50492c4ed94SBarry Smith #endif 50592c4ed94SBarry Smith rp = aj + ai[row]; 50692c4ed94SBarry Smith ap = aa + bs2*ai[row]; 50792c4ed94SBarry Smith rmax = imax[row]; 50892c4ed94SBarry Smith nrow = ailen[row]; 50992c4ed94SBarry Smith low = 0; 51092c4ed94SBarry Smith for ( l=0; l<n; l++ ) { /* loop over added columns */ 51192c4ed94SBarry Smith #if defined(PETSC_BOPT_g) 51292c4ed94SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 5138a84c255SSatish Balay if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 51492c4ed94SBarry Smith #endif 51592c4ed94SBarry Smith col = in[l]; 51692c4ed94SBarry Smith if (roworiented) { 5170e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 5180e324ae4SSatish Balay } else { 5190e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 52092c4ed94SBarry Smith } 52192c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 52292c4ed94SBarry Smith while (high-low > 7) { 52392c4ed94SBarry Smith t = (low+high)/2; 52492c4ed94SBarry Smith if (rp[t] > col) high = t; 52592c4ed94SBarry Smith else low = t; 52692c4ed94SBarry Smith } 52792c4ed94SBarry Smith for ( i=low; i<high; i++ ) { 52892c4ed94SBarry Smith if (rp[i] > col) break; 52992c4ed94SBarry Smith if (rp[i] == col) { 5308a84c255SSatish Balay bap = ap + bs2*i; 5310e324ae4SSatish Balay if (roworiented) { 5328a84c255SSatish Balay if (is == ADD_VALUES) { 533dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 534dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5358a84c255SSatish Balay bap[jj] += *value++; 536dd9472c6SBarry Smith } 537dd9472c6SBarry Smith } 5380e324ae4SSatish Balay } else { 539dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 540dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 5410e324ae4SSatish Balay bap[jj] = *value++; 5428a84c255SSatish Balay } 543dd9472c6SBarry Smith } 544dd9472c6SBarry Smith } 5450e324ae4SSatish Balay } else { 5460e324ae4SSatish Balay if (is == ADD_VALUES) { 547dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 548dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5490e324ae4SSatish Balay *bap++ += *value++; 550dd9472c6SBarry Smith } 551dd9472c6SBarry Smith } 5520e324ae4SSatish Balay } else { 553dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 554dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 5550e324ae4SSatish Balay *bap++ = *value++; 5560e324ae4SSatish Balay } 5578a84c255SSatish Balay } 558dd9472c6SBarry Smith } 559dd9472c6SBarry Smith } 560f1241b54SBarry Smith goto noinsert2; 56192c4ed94SBarry Smith } 56292c4ed94SBarry Smith } 56389280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 56411ebbc71SLois Curfman McInnes else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 56592c4ed94SBarry Smith if (nrow >= rmax) { 56692c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 56792c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 56892c4ed94SBarry Smith Scalar *new_a; 56992c4ed94SBarry Smith 57011ebbc71SLois Curfman McInnes if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 57189280ab3SLois Curfman McInnes 57292c4ed94SBarry Smith /* malloc new storage space */ 57392c4ed94SBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 57492c4ed94SBarry Smith new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 57592c4ed94SBarry Smith new_j = (int *) (new_a + bs2*new_nz); 57692c4ed94SBarry Smith new_i = new_j + new_nz; 57792c4ed94SBarry Smith 57892c4ed94SBarry Smith /* copy over old data into new slots */ 57992c4ed94SBarry Smith for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 58092c4ed94SBarry Smith for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 58192c4ed94SBarry Smith PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 58292c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 583dd9472c6SBarry Smith PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 58492c4ed94SBarry Smith PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 58592c4ed94SBarry Smith PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 586dd9472c6SBarry Smith PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 58792c4ed94SBarry Smith /* free up old matrix storage */ 58892c4ed94SBarry Smith PetscFree(a->a); 58992c4ed94SBarry Smith if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 59092c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 59192c4ed94SBarry Smith a->singlemalloc = 1; 59292c4ed94SBarry Smith 59392c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 59492c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 59592c4ed94SBarry Smith PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 59692c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 59792c4ed94SBarry Smith a->reallocs++; 59892c4ed94SBarry Smith a->nz++; 59992c4ed94SBarry Smith } 60092c4ed94SBarry Smith N = nrow++ - 1; 60192c4ed94SBarry Smith /* shift up all the later entries in this row */ 60292c4ed94SBarry Smith for ( ii=N; ii>=i; ii-- ) { 60392c4ed94SBarry Smith rp[ii+1] = rp[ii]; 60492c4ed94SBarry Smith PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 60592c4ed94SBarry Smith } 60692c4ed94SBarry Smith if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 60792c4ed94SBarry Smith rp[i] = col; 6088a84c255SSatish Balay bap = ap + bs2*i; 6090e324ae4SSatish Balay if (roworiented) { 610dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval) { 611dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs ) { 6120e324ae4SSatish Balay bap[jj] = *value++; 613dd9472c6SBarry Smith } 614dd9472c6SBarry Smith } 6150e324ae4SSatish Balay } else { 616dd9472c6SBarry Smith for ( ii=0; ii<bs; ii++,value+=stepval ) { 617dd9472c6SBarry Smith for (jj=0; jj<bs; jj++ ) { 6180e324ae4SSatish Balay *bap++ = *value++; 6190e324ae4SSatish Balay } 620dd9472c6SBarry Smith } 621dd9472c6SBarry Smith } 622f1241b54SBarry Smith noinsert2:; 62392c4ed94SBarry Smith low = i; 62492c4ed94SBarry Smith } 62592c4ed94SBarry Smith ailen[row] = nrow; 62692c4ed94SBarry Smith } 62792c4ed94SBarry Smith return 0; 62892c4ed94SBarry Smith } 62992c4ed94SBarry Smith 63092c4ed94SBarry Smith #undef __FUNC__ 631d4bb536fSBarry Smith #define __FUNC__ "MatGetSize_SeqBAIJ" 6328f6be9afSLois Curfman McInnes int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 6330b824a48SSatish Balay { 6340b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6350752156aSBarry Smith if (m) *m = a->m; 6360752156aSBarry Smith if (n) *n = a->n; 6370b824a48SSatish Balay return 0; 6380b824a48SSatish Balay } 6390b824a48SSatish Balay 6405615d1e5SSatish Balay #undef __FUNC__ 641d4bb536fSBarry Smith #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 6428f6be9afSLois Curfman McInnes int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 6430b824a48SSatish Balay { 6440b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6450b824a48SSatish Balay *m = 0; *n = a->m; 6460b824a48SSatish Balay return 0; 6470b824a48SSatish Balay } 6480b824a48SSatish Balay 6495615d1e5SSatish Balay #undef __FUNC__ 6505615d1e5SSatish Balay #define __FUNC__ "MatGetRow_SeqBAIJ" 6519867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6529867e207SSatish Balay { 6539867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6547e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 6559867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 6569867e207SSatish Balay 6579867e207SSatish Balay bs = a->bs; 6589867e207SSatish Balay ai = a->i; 6599867e207SSatish Balay aj = a->j; 6609867e207SSatish Balay aa = a->a; 6619df24120SSatish Balay bs2 = a->bs2; 6629867e207SSatish Balay 663e3372554SBarry Smith if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 6649867e207SSatish Balay 6659867e207SSatish Balay bn = row/bs; /* Block number */ 6669867e207SSatish Balay bp = row % bs; /* Block Position */ 6679867e207SSatish Balay M = ai[bn+1] - ai[bn]; 6689867e207SSatish Balay *nz = bs*M; 6699867e207SSatish Balay 6709867e207SSatish Balay if (v) { 6719867e207SSatish Balay *v = 0; 6729867e207SSatish Balay if (*nz) { 6739867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 6749867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6759867e207SSatish Balay v_i = *v + i*bs; 6767e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 6777e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 6789867e207SSatish Balay } 6799867e207SSatish Balay } 6809867e207SSatish Balay } 6819867e207SSatish Balay 6829867e207SSatish Balay if (idx) { 6839867e207SSatish Balay *idx = 0; 6849867e207SSatish Balay if (*nz) { 6859867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 6869867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 6879867e207SSatish Balay idx_i = *idx + i*bs; 6889867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 6899867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 6909867e207SSatish Balay } 6919867e207SSatish Balay } 6929867e207SSatish Balay } 6939867e207SSatish Balay return 0; 6949867e207SSatish Balay } 6959867e207SSatish Balay 6965615d1e5SSatish Balay #undef __FUNC__ 697d4bb536fSBarry Smith #define __FUNC__ "MatRestoreRow_SeqBAIJ" 6989867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 6999867e207SSatish Balay { 7009867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 7019867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 7029867e207SSatish Balay return 0; 7039867e207SSatish Balay } 704b6490206SBarry Smith 7055615d1e5SSatish Balay #undef __FUNC__ 7065615d1e5SSatish Balay #define __FUNC__ "MatTranspose_SeqBAIJ" 7078f6be9afSLois Curfman McInnes int MatTranspose_SeqBAIJ(Mat A,Mat *B) 708f2501298SSatish Balay { 709f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 710f2501298SSatish Balay Mat C; 711f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 7129df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 713f2501298SSatish Balay Scalar *array=a->a; 714f2501298SSatish Balay 715f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 716e3372554SBarry Smith SETERRQ(1,0,"Square matrix only for in-place"); 717f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 718f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 719a7c10996SSatish Balay 720a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 721f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 722f2501298SSatish Balay PetscFree(col); 723f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 724f2501298SSatish Balay cols = rows + bs; 725f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 726f2501298SSatish Balay cols[0] = i*bs; 727f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 728f2501298SSatish Balay len = ai[i+1] - ai[i]; 729f2501298SSatish Balay for ( j=0; j<len; j++ ) { 730f2501298SSatish Balay rows[0] = (*aj++)*bs; 731f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 732f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 733f2501298SSatish Balay array += bs2; 734f2501298SSatish Balay } 735f2501298SSatish Balay } 7361073c447SSatish Balay PetscFree(rows); 737f2501298SSatish Balay 7386d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 7396d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 740f2501298SSatish Balay 741f2501298SSatish Balay if (B != PETSC_NULL) { 742f2501298SSatish Balay *B = C; 743f2501298SSatish Balay } else { 744f2501298SSatish Balay /* This isn't really an in-place transpose */ 745f2501298SSatish Balay PetscFree(a->a); 746f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 747f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 748f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 749f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 750f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 751f2501298SSatish Balay PetscFree(a); 752f09e8eb9SSatish Balay PetscMemcpy(A,C,sizeof(struct _p_Mat)); 753f2501298SSatish Balay PetscHeaderDestroy(C); 754f2501298SSatish Balay } 755f2501298SSatish Balay return 0; 756f2501298SSatish Balay } 757f2501298SSatish Balay 758f2501298SSatish Balay 7595615d1e5SSatish Balay #undef __FUNC__ 7605615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 7618f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 762584200bdSSatish Balay { 763584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 764584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 765a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 76643ee02c3SBarry Smith int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 767584200bdSSatish Balay Scalar *aa = a->a, *ap; 768584200bdSSatish Balay 7696d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 770584200bdSSatish Balay 77143ee02c3SBarry Smith if (m) rmax = ailen[0]; 772584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 773584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 774584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 775d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 776584200bdSSatish Balay if (fshift) { 777a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 778584200bdSSatish Balay N = ailen[i]; 779584200bdSSatish Balay for ( j=0; j<N; j++ ) { 780584200bdSSatish Balay ip[j-fshift] = ip[j]; 7817e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 782584200bdSSatish Balay } 783584200bdSSatish Balay } 784584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 785584200bdSSatish Balay } 786584200bdSSatish Balay if (mbs) { 787584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 788584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 789584200bdSSatish Balay } 790584200bdSSatish Balay /* reset ilen and imax for each row */ 791584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 792584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 793584200bdSSatish Balay } 794a7c10996SSatish Balay a->nz = ai[mbs]; 795584200bdSSatish Balay 796584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 797584200bdSSatish Balay if (fshift && a->diag) { 798584200bdSSatish Balay PetscFree(a->diag); 799584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 800584200bdSSatish Balay a->diag = 0; 801584200bdSSatish Balay } 8023d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 803219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 8043d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 805584200bdSSatish Balay a->reallocs); 806d402145bSBarry Smith PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 807e2f3b5e9SSatish Balay a->reallocs = 0; 8084e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 8094e220ebcSLois Curfman McInnes 810584200bdSSatish Balay return 0; 811584200bdSSatish Balay } 812584200bdSSatish Balay 8135615d1e5SSatish Balay #undef __FUNC__ 8145615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_SeqBAIJ" 8158f6be9afSLois Curfman McInnes int MatZeroEntries_SeqBAIJ(Mat A) 8162593348eSBarry Smith { 817b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8189df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 8192593348eSBarry Smith return 0; 8202593348eSBarry Smith } 8212593348eSBarry Smith 8225615d1e5SSatish Balay #undef __FUNC__ 823d4bb536fSBarry Smith #define __FUNC__ "MatDestroy_SeqBAIJ" 824b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 8252593348eSBarry Smith { 8262593348eSBarry Smith Mat A = (Mat) obj; 827b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8282593348eSBarry Smith 8292593348eSBarry Smith #if defined(PETSC_LOG) 8302593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 8312593348eSBarry Smith #endif 8322593348eSBarry Smith PetscFree(a->a); 8332593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 8342593348eSBarry Smith if (a->diag) PetscFree(a->diag); 8352593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 8362593348eSBarry Smith if (a->imax) PetscFree(a->imax); 8372593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 838de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 8392593348eSBarry Smith PetscFree(a); 840f2655603SLois Curfman McInnes PLogObjectDestroy(A); 841f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 8422593348eSBarry Smith return 0; 8432593348eSBarry Smith } 8442593348eSBarry Smith 8455615d1e5SSatish Balay #undef __FUNC__ 846d4bb536fSBarry Smith #define __FUNC__ "MatSetOption_SeqBAIJ" 8478f6be9afSLois Curfman McInnes int MatSetOption_SeqBAIJ(Mat A,MatOption op) 8482593348eSBarry Smith { 849b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 8506d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 8516d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 8526d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 853219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 8546d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 855c2653b3dSLois Curfman McInnes else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 85696854ed6SLois Curfman McInnes else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 8576d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 8586d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 859219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 8606d4a8577SBarry Smith op == MAT_SYMMETRIC || 8616d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 86290f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 8632b362799SSatish Balay op == MAT_IGNORE_OFF_PROC_ENTRIES) 86494a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 8656d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 866e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 8672593348eSBarry Smith else 868e3372554SBarry Smith {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 8692593348eSBarry Smith return 0; 8702593348eSBarry Smith } 8712593348eSBarry Smith 8722593348eSBarry Smith 8732593348eSBarry Smith /* -------------------------------------------------------*/ 8742593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 8752593348eSBarry Smith /* -------------------------------------------------------*/ 876b6490206SBarry Smith #include "pinclude/plapack.h" 877b6490206SBarry Smith 8785615d1e5SSatish Balay #undef __FUNC__ 8795615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_1" 88039b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 8812593348eSBarry Smith { 882b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 88339b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 884c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 8852593348eSBarry Smith 886c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 887c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 888b6490206SBarry Smith 889119a934aSSatish Balay idx = a->j; 890119a934aSSatish Balay v = a->a; 891119a934aSSatish Balay ii = a->i; 892119a934aSSatish Balay 893119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 894119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 895119a934aSSatish Balay sum = 0.0; 896119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 8971a6a6d98SBarry Smith z[i] = sum; 898119a934aSSatish Balay } 899c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 900c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 90139b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 90239b95ed1SBarry Smith return 0; 90339b95ed1SBarry Smith } 90439b95ed1SBarry Smith 9055615d1e5SSatish Balay #undef __FUNC__ 9065615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_2" 90739b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 90839b95ed1SBarry Smith { 90939b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 91039b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 91139b95ed1SBarry Smith register Scalar x1,x2; 91239b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 913c16cb8f2SBarry Smith int j,n; 91439b95ed1SBarry Smith 915c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 916c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 91739b95ed1SBarry Smith 91839b95ed1SBarry Smith idx = a->j; 91939b95ed1SBarry Smith v = a->a; 92039b95ed1SBarry Smith ii = a->i; 92139b95ed1SBarry Smith 922119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 923119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 924119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 925119a934aSSatish Balay for ( j=0; j<n; j++ ) { 926119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 927119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 928119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 929119a934aSSatish Balay v += 4; 930119a934aSSatish Balay } 9311a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 932119a934aSSatish Balay z += 2; 933119a934aSSatish Balay } 934c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 935c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 93639b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 93739b95ed1SBarry Smith return 0; 93839b95ed1SBarry Smith } 93939b95ed1SBarry Smith 9405615d1e5SSatish Balay #undef __FUNC__ 9415615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_3" 94239b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 94339b95ed1SBarry Smith { 94439b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 94539b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 946c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 94739b95ed1SBarry Smith 948c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 949c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 95039b95ed1SBarry Smith 95139b95ed1SBarry Smith idx = a->j; 95239b95ed1SBarry Smith v = a->a; 95339b95ed1SBarry Smith ii = a->i; 95439b95ed1SBarry Smith 955119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 956119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 957119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 958119a934aSSatish Balay for ( j=0; j<n; j++ ) { 959119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 960119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 961119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 962119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 963119a934aSSatish Balay v += 9; 964119a934aSSatish Balay } 9651a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 966119a934aSSatish Balay z += 3; 967119a934aSSatish Balay } 968c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 969c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 97039b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 97139b95ed1SBarry Smith return 0; 97239b95ed1SBarry Smith } 97339b95ed1SBarry Smith 9745615d1e5SSatish Balay #undef __FUNC__ 9755615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_4" 97639b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 97739b95ed1SBarry Smith { 97839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97939b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 98039b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 98139b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 982c16cb8f2SBarry Smith int j,n; 98339b95ed1SBarry Smith 984c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 985c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 98639b95ed1SBarry Smith 98739b95ed1SBarry Smith idx = a->j; 98839b95ed1SBarry Smith v = a->a; 98939b95ed1SBarry Smith ii = a->i; 99039b95ed1SBarry Smith 991119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 992119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 993119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 994119a934aSSatish Balay for ( j=0; j<n; j++ ) { 995119a934aSSatish Balay xb = x + 4*(*idx++); 996119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 997119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 998119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 999119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1000119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1001119a934aSSatish Balay v += 16; 1002119a934aSSatish Balay } 10031a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1004119a934aSSatish Balay z += 4; 1005119a934aSSatish Balay } 1006c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1007c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 100839b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 100939b95ed1SBarry Smith return 0; 101039b95ed1SBarry Smith } 101139b95ed1SBarry Smith 10125615d1e5SSatish Balay #undef __FUNC__ 10135615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_5" 101439b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 101539b95ed1SBarry Smith { 101639b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101739b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 101839b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 1019c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 102039b95ed1SBarry Smith 1021c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1022c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 102339b95ed1SBarry Smith 102439b95ed1SBarry Smith idx = a->j; 102539b95ed1SBarry Smith v = a->a; 102639b95ed1SBarry Smith ii = a->i; 102739b95ed1SBarry Smith 1028119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1029119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1030119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1031119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1032119a934aSSatish Balay xb = x + 5*(*idx++); 1033119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1034119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1035119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1036119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1037119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1038119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1039119a934aSSatish Balay v += 25; 1040119a934aSSatish Balay } 10411a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1042119a934aSSatish Balay z += 5; 1043119a934aSSatish Balay } 1044c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1045c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 104639b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 104739b95ed1SBarry Smith return 0; 104839b95ed1SBarry Smith } 104939b95ed1SBarry Smith 10505615d1e5SSatish Balay #undef __FUNC__ 105148e9ddb2SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_7" 105248e9ddb2SSatish Balay static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 105348e9ddb2SSatish Balay { 105448e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 105548e9ddb2SSatish Balay register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 105648e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 105748e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 105848e9ddb2SSatish Balay 105948e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 106048e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 106148e9ddb2SSatish Balay 106248e9ddb2SSatish Balay idx = a->j; 106348e9ddb2SSatish Balay v = a->a; 106448e9ddb2SSatish Balay ii = a->i; 106548e9ddb2SSatish Balay 106648e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 106748e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 106848e9ddb2SSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 106948e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 107048e9ddb2SSatish Balay xb = x + 7*(*idx++); 107148e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 107248e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 107348e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 107448e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 107548e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 107648e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 107748e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 107848e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 107948e9ddb2SSatish Balay v += 49; 108048e9ddb2SSatish Balay } 108148e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 108248e9ddb2SSatish Balay z += 7; 108348e9ddb2SSatish Balay } 108448e9ddb2SSatish Balay 108548e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 108648e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 108748e9ddb2SSatish Balay PLogFlops(98*a->nz - a->m); 108848e9ddb2SSatish Balay return 0; 108948e9ddb2SSatish Balay } 109048e9ddb2SSatish Balay 109148e9ddb2SSatish Balay #undef __FUNC__ 10925615d1e5SSatish Balay #define __FUNC__ "MatMult_SeqBAIJ_N" 109339b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 109439b95ed1SBarry Smith { 109539b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 109639b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 1097c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1098c16cb8f2SBarry Smith int _One = 1,ncols,k; 1099c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 110039b95ed1SBarry Smith 1101c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1102c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 110339b95ed1SBarry Smith 110439b95ed1SBarry Smith idx = a->j; 110539b95ed1SBarry Smith v = a->a; 110639b95ed1SBarry Smith ii = a->i; 110739b95ed1SBarry Smith 110839b95ed1SBarry Smith 1109119a934aSSatish Balay if (!a->mult_work) { 11103b547af2SSatish Balay k = PetscMax(a->m,a->n); 11112b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1112119a934aSSatish Balay } 1113119a934aSSatish Balay work = a->mult_work; 1114119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1115119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1116119a934aSSatish Balay ncols = n*bs; 1117119a934aSSatish Balay workt = work; 1118119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1119119a934aSSatish Balay xb = x + bs*(*idx++); 1120119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1121119a934aSSatish Balay workt += bs; 1122119a934aSSatish Balay } 11231a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1124119a934aSSatish Balay v += n*bs2; 1125119a934aSSatish Balay z += bs; 1126119a934aSSatish Balay } 1127c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1128c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 11291a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 1130bb42667fSSatish Balay return 0; 1131bb42667fSSatish Balay } 1132bb42667fSSatish Balay 11335615d1e5SSatish Balay #undef __FUNC__ 11345615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1135f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1136f44d3a6dSSatish Balay { 1137f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1138f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 1139c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 1140f44d3a6dSSatish Balay 1141c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1142c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1143c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1144f44d3a6dSSatish Balay 1145f44d3a6dSSatish Balay idx = a->j; 1146f44d3a6dSSatish Balay v = a->a; 1147f44d3a6dSSatish Balay ii = a->i; 1148f44d3a6dSSatish Balay 1149f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1150f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1151f44d3a6dSSatish Balay sum = y[i]; 1152f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 1153f44d3a6dSSatish Balay z[i] = sum; 1154f44d3a6dSSatish Balay } 1155c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1156c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1157c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1158f44d3a6dSSatish Balay PLogFlops(2*a->nz); 1159f44d3a6dSSatish Balay return 0; 1160f44d3a6dSSatish Balay } 1161f44d3a6dSSatish Balay 11625615d1e5SSatish Balay #undef __FUNC__ 11635615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1164f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1165f44d3a6dSSatish Balay { 1166f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1167f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1168f44d3a6dSSatish Balay register Scalar x1,x2; 1169f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1170c16cb8f2SBarry Smith int j,n; 1171f44d3a6dSSatish Balay 1172c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1173c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1174c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1175f44d3a6dSSatish Balay 1176f44d3a6dSSatish Balay idx = a->j; 1177f44d3a6dSSatish Balay v = a->a; 1178f44d3a6dSSatish Balay ii = a->i; 1179f44d3a6dSSatish Balay 1180f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1181f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1182f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 1183f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1184f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1185f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 1186f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 1187f44d3a6dSSatish Balay v += 4; 1188f44d3a6dSSatish Balay } 1189f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 1190f44d3a6dSSatish Balay z += 2; y += 2; 1191f44d3a6dSSatish Balay } 1192c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1193c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1194c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1195f44d3a6dSSatish Balay PLogFlops(4*a->nz); 1196f44d3a6dSSatish Balay return 0; 1197f44d3a6dSSatish Balay } 1198f44d3a6dSSatish Balay 11995615d1e5SSatish Balay #undef __FUNC__ 12005615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1201f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1202f44d3a6dSSatish Balay { 1203f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1204f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1205c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1206f44d3a6dSSatish Balay 1207c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1208c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1209c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1210f44d3a6dSSatish Balay 1211f44d3a6dSSatish Balay idx = a->j; 1212f44d3a6dSSatish Balay v = a->a; 1213f44d3a6dSSatish Balay ii = a->i; 1214f44d3a6dSSatish Balay 1215f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1216f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1217f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1218f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1219f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1220f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1221f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1222f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1223f44d3a6dSSatish Balay v += 9; 1224f44d3a6dSSatish Balay } 1225f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 1226f44d3a6dSSatish Balay z += 3; y += 3; 1227f44d3a6dSSatish Balay } 1228c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1229c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1230c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1231f44d3a6dSSatish Balay PLogFlops(18*a->nz); 1232f44d3a6dSSatish Balay return 0; 1233f44d3a6dSSatish Balay } 1234f44d3a6dSSatish Balay 12355615d1e5SSatish Balay #undef __FUNC__ 12365615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1237f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1238f44d3a6dSSatish Balay { 1239f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1240f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1241f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 1242f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 1243c16cb8f2SBarry Smith int j,n; 1244f44d3a6dSSatish Balay 1245c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1246c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1247c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1248f44d3a6dSSatish Balay 1249f44d3a6dSSatish Balay idx = a->j; 1250f44d3a6dSSatish Balay v = a->a; 1251f44d3a6dSSatish Balay ii = a->i; 1252f44d3a6dSSatish Balay 1253f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1254f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1255f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1256f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1257f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1258f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1259f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1260f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1261f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1262f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1263f44d3a6dSSatish Balay v += 16; 1264f44d3a6dSSatish Balay } 1265f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1266f44d3a6dSSatish Balay z += 4; y += 4; 1267f44d3a6dSSatish Balay } 1268c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1269c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1270c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1271f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1272f44d3a6dSSatish Balay return 0; 1273f44d3a6dSSatish Balay } 1274f44d3a6dSSatish Balay 12755615d1e5SSatish Balay #undef __FUNC__ 12765615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1277f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1278f44d3a6dSSatish Balay { 1279f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1280f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1281f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1282c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1283f44d3a6dSSatish Balay 1284c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1285c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1286c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1287f44d3a6dSSatish Balay 1288f44d3a6dSSatish Balay idx = a->j; 1289f44d3a6dSSatish Balay v = a->a; 1290f44d3a6dSSatish Balay ii = a->i; 1291f44d3a6dSSatish Balay 1292f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1293f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1294f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1295f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1296f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1297f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1298f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1299f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1300f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1301f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1302f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1303f44d3a6dSSatish Balay v += 25; 1304f44d3a6dSSatish Balay } 1305f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1306f44d3a6dSSatish Balay z += 5; y += 5; 1307f44d3a6dSSatish Balay } 1308c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1309c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1310c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1311f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1312f44d3a6dSSatish Balay return 0; 1313f44d3a6dSSatish Balay } 1314f44d3a6dSSatish Balay 13155615d1e5SSatish Balay #undef __FUNC__ 131648e9ddb2SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 131748e9ddb2SSatish Balay static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 131848e9ddb2SSatish Balay { 131948e9ddb2SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 132048e9ddb2SSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 132148e9ddb2SSatish Balay register Scalar x1,x2,x3,x4,x5,x6,x7; 132248e9ddb2SSatish Balay int mbs=a->mbs,i,*idx,*ii,j,n; 132348e9ddb2SSatish Balay 132448e9ddb2SSatish Balay VecGetArray_Fast(xx,x); 132548e9ddb2SSatish Balay VecGetArray_Fast(yy,y); 132648e9ddb2SSatish Balay VecGetArray_Fast(zz,z); 132748e9ddb2SSatish Balay 132848e9ddb2SSatish Balay idx = a->j; 132948e9ddb2SSatish Balay v = a->a; 133048e9ddb2SSatish Balay ii = a->i; 133148e9ddb2SSatish Balay 133248e9ddb2SSatish Balay for ( i=0; i<mbs; i++ ) { 133348e9ddb2SSatish Balay n = ii[1] - ii[0]; ii++; 133448e9ddb2SSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 133548e9ddb2SSatish Balay for ( j=0; j<n; j++ ) { 133648e9ddb2SSatish Balay xb = x + 7*(*idx++); 133748e9ddb2SSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 133848e9ddb2SSatish Balay sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 133948e9ddb2SSatish Balay sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 134048e9ddb2SSatish Balay sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 134148e9ddb2SSatish Balay sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 134248e9ddb2SSatish Balay sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 134348e9ddb2SSatish Balay sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 134448e9ddb2SSatish Balay sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 134548e9ddb2SSatish Balay v += 49; 134648e9ddb2SSatish Balay } 134748e9ddb2SSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 134848e9ddb2SSatish Balay z += 7; y += 7; 134948e9ddb2SSatish Balay } 135048e9ddb2SSatish Balay VecRestoreArray_Fast(xx,x); 135148e9ddb2SSatish Balay VecRestoreArray_Fast(yy,y); 135248e9ddb2SSatish Balay VecRestoreArray_Fast(zz,z); 135348e9ddb2SSatish Balay PLogFlops(98*a->nz); 135448e9ddb2SSatish Balay return 0; 135548e9ddb2SSatish Balay } 135648e9ddb2SSatish Balay 135748e9ddb2SSatish Balay 135848e9ddb2SSatish Balay #undef __FUNC__ 13595615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1360f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1361f44d3a6dSSatish Balay { 1362f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1363f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1364f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1365f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1366f44d3a6dSSatish Balay 1367f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1368f44d3a6dSSatish Balay 1369c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1370c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1371f44d3a6dSSatish Balay 1372f44d3a6dSSatish Balay idx = a->j; 1373f44d3a6dSSatish Balay v = a->a; 1374f44d3a6dSSatish Balay ii = a->i; 1375f44d3a6dSSatish Balay 1376f44d3a6dSSatish Balay 1377f44d3a6dSSatish Balay if (!a->mult_work) { 1378f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1379f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1380f44d3a6dSSatish Balay } 1381f44d3a6dSSatish Balay work = a->mult_work; 1382f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1383f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1384f44d3a6dSSatish Balay ncols = n*bs; 1385f44d3a6dSSatish Balay workt = work; 1386f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1387f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1388f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1389f44d3a6dSSatish Balay workt += bs; 1390f44d3a6dSSatish Balay } 1391f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1392f44d3a6dSSatish Balay v += n*bs2; 1393f44d3a6dSSatish Balay z += bs; 1394f44d3a6dSSatish Balay } 1395c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1396c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1397f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1398f44d3a6dSSatish Balay return 0; 1399f44d3a6dSSatish Balay } 1400f44d3a6dSSatish Balay 14015615d1e5SSatish Balay #undef __FUNC__ 14025615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_SeqBAIJ" 14038f6be9afSLois Curfman McInnes int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1404bb42667fSSatish Balay { 1405bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14061a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1407bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1408bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 14099df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1410119a934aSSatish Balay 1411119a934aSSatish Balay 141290f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 141390f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1414bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1415bb42667fSSatish Balay 1416119a934aSSatish Balay idx = a->j; 1417119a934aSSatish Balay v = a->a; 1418119a934aSSatish Balay ii = a->i; 1419119a934aSSatish Balay 1420119a934aSSatish Balay switch (bs) { 1421119a934aSSatish Balay case 1: 1422119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1423119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1424119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1425119a934aSSatish Balay ib = idx + ai[i]; 1426119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1427bb1453f0SSatish Balay rval = ib[j]; 1428bb1453f0SSatish Balay z[rval] += *v++ * x1; 1429119a934aSSatish Balay } 1430119a934aSSatish Balay } 1431119a934aSSatish Balay break; 1432119a934aSSatish Balay case 2: 1433119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1434119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1435119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1436119a934aSSatish Balay ib = idx + ai[i]; 1437119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1438119a934aSSatish Balay rval = ib[j]*2; 1439bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1440bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1441119a934aSSatish Balay v += 4; 1442119a934aSSatish Balay } 1443119a934aSSatish Balay } 1444119a934aSSatish Balay break; 1445119a934aSSatish Balay case 3: 1446119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1447119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1448119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1449119a934aSSatish Balay ib = idx + ai[i]; 1450119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1451119a934aSSatish Balay rval = ib[j]*3; 1452bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1453bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1454bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1455119a934aSSatish Balay v += 9; 1456119a934aSSatish Balay } 1457119a934aSSatish Balay } 1458119a934aSSatish Balay break; 1459119a934aSSatish Balay case 4: 1460119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1461119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1462119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1463119a934aSSatish Balay ib = idx + ai[i]; 1464119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1465119a934aSSatish Balay rval = ib[j]*4; 1466bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1467bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1468bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1469bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1470119a934aSSatish Balay v += 16; 1471119a934aSSatish Balay } 1472119a934aSSatish Balay } 1473119a934aSSatish Balay break; 1474119a934aSSatish Balay case 5: 1475119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1476119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1477119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1478119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1479119a934aSSatish Balay ib = idx + ai[i]; 1480119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1481119a934aSSatish Balay rval = ib[j]*5; 1482bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1483bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1484bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1485bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1486bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1487119a934aSSatish Balay v += 25; 1488119a934aSSatish Balay } 1489119a934aSSatish Balay } 1490119a934aSSatish Balay break; 1491119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1492119a934aSSatish Balay default: { 1493119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1494119a934aSSatish Balay if (!a->mult_work) { 14953b547af2SSatish Balay k = PetscMax(a->m,a->n); 1496bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1497119a934aSSatish Balay CHKPTRQ(a->mult_work); 1498119a934aSSatish Balay } 1499119a934aSSatish Balay work = a->mult_work; 1500119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1501119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1502119a934aSSatish Balay ncols = n*bs; 1503119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1504119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1505119a934aSSatish Balay v += n*bs2; 1506119a934aSSatish Balay x += bs; 1507119a934aSSatish Balay workt = work; 1508119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1509119a934aSSatish Balay zb = z + bs*(*idx++); 1510bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1511119a934aSSatish Balay workt += bs; 1512119a934aSSatish Balay } 1513119a934aSSatish Balay } 1514119a934aSSatish Balay } 1515119a934aSSatish Balay } 15160419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 15170419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1518faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1519faf6580fSSatish Balay return 0; 1520faf6580fSSatish Balay } 15211c351548SSatish Balay 15225615d1e5SSatish Balay #undef __FUNC__ 15235615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 15248f6be9afSLois Curfman McInnes int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1525faf6580fSSatish Balay { 1526faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1527faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1528faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1529faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1530faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1531faf6580fSSatish Balay 1532faf6580fSSatish Balay 1533faf6580fSSatish Balay 153490f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 153590f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1536faf6580fSSatish Balay 1537648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1538648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1539648c1d95SSatish Balay 1540faf6580fSSatish Balay 1541faf6580fSSatish Balay idx = a->j; 1542faf6580fSSatish Balay v = a->a; 1543faf6580fSSatish Balay ii = a->i; 1544faf6580fSSatish Balay 1545faf6580fSSatish Balay switch (bs) { 1546faf6580fSSatish Balay case 1: 1547faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1548faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1549faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1550faf6580fSSatish Balay ib = idx + ai[i]; 1551faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1552faf6580fSSatish Balay rval = ib[j]; 1553faf6580fSSatish Balay z[rval] += *v++ * x1; 1554faf6580fSSatish Balay } 1555faf6580fSSatish Balay } 1556faf6580fSSatish Balay break; 1557faf6580fSSatish Balay case 2: 1558faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1559faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1560faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1561faf6580fSSatish Balay ib = idx + ai[i]; 1562faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1563faf6580fSSatish Balay rval = ib[j]*2; 1564faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1565faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1566faf6580fSSatish Balay v += 4; 1567faf6580fSSatish Balay } 1568faf6580fSSatish Balay } 1569faf6580fSSatish Balay break; 1570faf6580fSSatish Balay case 3: 1571faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1572faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1573faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1574faf6580fSSatish Balay ib = idx + ai[i]; 1575faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1576faf6580fSSatish Balay rval = ib[j]*3; 1577faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1578faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1579faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1580faf6580fSSatish Balay v += 9; 1581faf6580fSSatish Balay } 1582faf6580fSSatish Balay } 1583faf6580fSSatish Balay break; 1584faf6580fSSatish Balay case 4: 1585faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1586faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1587faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1588faf6580fSSatish Balay ib = idx + ai[i]; 1589faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1590faf6580fSSatish Balay rval = ib[j]*4; 1591faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1592faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1593faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1594faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1595faf6580fSSatish Balay v += 16; 1596faf6580fSSatish Balay } 1597faf6580fSSatish Balay } 1598faf6580fSSatish Balay break; 1599faf6580fSSatish Balay case 5: 1600faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1601faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1602faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1603faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1604faf6580fSSatish Balay ib = idx + ai[i]; 1605faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1606faf6580fSSatish Balay rval = ib[j]*5; 1607faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1608faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1609faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1610faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1611faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1612faf6580fSSatish Balay v += 25; 1613faf6580fSSatish Balay } 1614faf6580fSSatish Balay } 1615faf6580fSSatish Balay break; 1616faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1617faf6580fSSatish Balay default: { 1618faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1619faf6580fSSatish Balay if (!a->mult_work) { 1620faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1621faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1622faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1623faf6580fSSatish Balay } 1624faf6580fSSatish Balay work = a->mult_work; 1625faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1626faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1627faf6580fSSatish Balay ncols = n*bs; 1628faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1629faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1630faf6580fSSatish Balay v += n*bs2; 1631faf6580fSSatish Balay x += bs; 1632faf6580fSSatish Balay workt = work; 1633faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1634faf6580fSSatish Balay zb = z + bs*(*idx++); 1635faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1636faf6580fSSatish Balay workt += bs; 1637faf6580fSSatish Balay } 1638faf6580fSSatish Balay } 1639faf6580fSSatish Balay } 1640faf6580fSSatish Balay } 1641faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1642faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1643faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 16442593348eSBarry Smith return 0; 16452593348eSBarry Smith } 16462593348eSBarry Smith 16475615d1e5SSatish Balay #undef __FUNC__ 1648d4bb536fSBarry Smith #define __FUNC__ "MatGetInfo_SeqBAIJ" 16498f6be9afSLois Curfman McInnes int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 16502593348eSBarry Smith { 1651b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 16524e220ebcSLois Curfman McInnes 16534e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 16544e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 16554e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 16564e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 16574e220ebcSLois Curfman McInnes info->block_size = a->bs2; 16584e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 16594e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 16604e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 16614e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 16624e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 16634e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 16644e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 16654e220ebcSLois Curfman McInnes info->memory = A->mem; 16664e220ebcSLois Curfman McInnes if (A->factor) { 16674e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 16684e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 16694e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 16704e220ebcSLois Curfman McInnes } else { 16714e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 16724e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 16734e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 16744e220ebcSLois Curfman McInnes } 16752593348eSBarry Smith return 0; 16762593348eSBarry Smith } 16772593348eSBarry Smith 16785615d1e5SSatish Balay #undef __FUNC__ 16795615d1e5SSatish Balay #define __FUNC__ "MatEqual_SeqBAIJ" 16808f6be9afSLois Curfman McInnes int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 168191d316f6SSatish Balay { 168291d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 168391d316f6SSatish Balay 1684e3372554SBarry Smith if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 168591d316f6SSatish Balay 168691d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 168791d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1688a7c10996SSatish Balay (a->nz != b->nz)) { 168991d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 169091d316f6SSatish Balay } 169191d316f6SSatish Balay 169291d316f6SSatish Balay /* if the a->i are the same */ 169391d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 169491d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 169591d316f6SSatish Balay } 169691d316f6SSatish Balay 169791d316f6SSatish Balay /* if a->j are the same */ 169891d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 169991d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 170091d316f6SSatish Balay } 170191d316f6SSatish Balay 170291d316f6SSatish Balay /* if a->a are the same */ 170391d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 170491d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 170591d316f6SSatish Balay } 170691d316f6SSatish Balay *flg = PETSC_TRUE; 170791d316f6SSatish Balay return 0; 170891d316f6SSatish Balay 170991d316f6SSatish Balay } 171091d316f6SSatish Balay 17115615d1e5SSatish Balay #undef __FUNC__ 17125615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 17138f6be9afSLois Curfman McInnes int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 171491d316f6SSatish Balay { 171591d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17167e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 171717e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 171817e48fc4SSatish Balay 17190513a670SBarry Smith if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 172017e48fc4SSatish Balay bs = a->bs; 172117e48fc4SSatish Balay aa = a->a; 172217e48fc4SSatish Balay ai = a->i; 172317e48fc4SSatish Balay aj = a->j; 172417e48fc4SSatish Balay ambs = a->mbs; 17259df24120SSatish Balay bs2 = a->bs2; 172691d316f6SSatish Balay 172791d316f6SSatish Balay VecSet(&zero,v); 172890f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1729e3372554SBarry Smith if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 173017e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 173117e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 173217e48fc4SSatish Balay if (aj[j] == i) { 173317e48fc4SSatish Balay row = i*bs; 17347e67e3f9SSatish Balay aa_j = aa+j*bs2; 17357e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 173691d316f6SSatish Balay break; 173791d316f6SSatish Balay } 173891d316f6SSatish Balay } 173991d316f6SSatish Balay } 174091d316f6SSatish Balay return 0; 174191d316f6SSatish Balay } 174291d316f6SSatish Balay 17435615d1e5SSatish Balay #undef __FUNC__ 17445615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 17458f6be9afSLois Curfman McInnes int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 17469867e207SSatish Balay { 17479867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 17489867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 17497e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 17509867e207SSatish Balay 17519867e207SSatish Balay ai = a->i; 17529867e207SSatish Balay aj = a->j; 17539867e207SSatish Balay aa = a->a; 17549867e207SSatish Balay m = a->m; 17559867e207SSatish Balay n = a->n; 17569867e207SSatish Balay bs = a->bs; 17579867e207SSatish Balay mbs = a->mbs; 17589df24120SSatish Balay bs2 = a->bs2; 17599867e207SSatish Balay if (ll) { 176090f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1761e3372554SBarry Smith if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 17629867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17639867e207SSatish Balay M = ai[i+1] - ai[i]; 17649867e207SSatish Balay li = l + i*bs; 17657e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17669867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17677e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 17689867e207SSatish Balay (*v++) *= li[k%bs]; 17699867e207SSatish Balay } 17709867e207SSatish Balay } 17719867e207SSatish Balay } 17729867e207SSatish Balay } 17739867e207SSatish Balay 17749867e207SSatish Balay if (rr) { 177590f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1776e3372554SBarry Smith if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 17779867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 17789867e207SSatish Balay M = ai[i+1] - ai[i]; 17797e67e3f9SSatish Balay v = aa + bs2*ai[i]; 17809867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 17819867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 17829867e207SSatish Balay for ( k=0; k<bs; k++ ) { 17839867e207SSatish Balay x = ri[k]; 17849867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 17859867e207SSatish Balay } 17869867e207SSatish Balay } 17879867e207SSatish Balay } 17889867e207SSatish Balay } 17899867e207SSatish Balay return 0; 17909867e207SSatish Balay } 17919867e207SSatish Balay 17929867e207SSatish Balay 1793b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1794b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 17952a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1796736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1797736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 17981a6a6d98SBarry Smith 17991a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 18001a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 18011a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 18021a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 18031a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 18041a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 180548e9ddb2SSatish Balay extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 18061a6a6d98SBarry Smith 18077fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 18087fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 18097fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 18107fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 18117fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 18127fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 18132593348eSBarry Smith 18145615d1e5SSatish Balay #undef __FUNC__ 18155615d1e5SSatish Balay #define __FUNC__ "MatNorm_SeqBAIJ" 18168f6be9afSLois Curfman McInnes int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 18172593348eSBarry Smith { 1818b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18192593348eSBarry Smith Scalar *v = a->a; 18202593348eSBarry Smith double sum = 0.0; 18219df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 18222593348eSBarry Smith 18232593348eSBarry Smith if (type == NORM_FROBENIUS) { 18240419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 18252593348eSBarry Smith #if defined(PETSC_COMPLEX) 18262593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 18272593348eSBarry Smith #else 18282593348eSBarry Smith sum += (*v)*(*v); v++; 18292593348eSBarry Smith #endif 18302593348eSBarry Smith } 18312593348eSBarry Smith *norm = sqrt(sum); 18322593348eSBarry Smith } 18332593348eSBarry Smith else { 1834e3372554SBarry Smith SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 18352593348eSBarry Smith } 18362593348eSBarry Smith return 0; 18372593348eSBarry Smith } 18382593348eSBarry Smith 18393eee16b0SBarry Smith extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 18402593348eSBarry Smith /* 18412593348eSBarry Smith note: This can only work for identity for row and col. It would 18422593348eSBarry Smith be good to check this and otherwise generate an error. 18432593348eSBarry Smith */ 18445615d1e5SSatish Balay #undef __FUNC__ 18455615d1e5SSatish Balay #define __FUNC__ "MatILUFactor_SeqBAIJ" 18468f6be9afSLois Curfman McInnes int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 18472593348eSBarry Smith { 1848b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18492593348eSBarry Smith Mat outA; 1850de6a44a3SBarry Smith int ierr; 18512593348eSBarry Smith 1852e3372554SBarry Smith if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 18532593348eSBarry Smith 18542593348eSBarry Smith outA = inA; 18552593348eSBarry Smith inA->factor = FACTOR_LU; 18562593348eSBarry Smith a->row = row; 18572593348eSBarry Smith a->col = col; 18582593348eSBarry Smith 1859eed86810SBarry Smith if (!a->solve_work) { 1860de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1861eed86810SBarry Smith PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1862eed86810SBarry Smith } 18632593348eSBarry Smith 18642593348eSBarry Smith if (!a->diag) { 1865b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 18662593348eSBarry Smith } 18677fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 18683eee16b0SBarry Smith 18693eee16b0SBarry Smith /* 18703eee16b0SBarry Smith Blocksize 4 has a special faster solver for ILU(0) factorization 18713eee16b0SBarry Smith with natural ordering 18723eee16b0SBarry Smith */ 18733eee16b0SBarry Smith if (a->bs == 4) { 18743eee16b0SBarry Smith inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 18753eee16b0SBarry Smith } 18763eee16b0SBarry Smith 18772593348eSBarry Smith return 0; 18782593348eSBarry Smith } 18792593348eSBarry Smith 18805615d1e5SSatish Balay #undef __FUNC__ 18815615d1e5SSatish Balay #define __FUNC__ "MatScale_SeqBAIJ" 18828f6be9afSLois Curfman McInnes int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 18832593348eSBarry Smith { 1884b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 18859df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1886b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1887b6490206SBarry Smith PLogFlops(totalnz); 18882593348eSBarry Smith return 0; 18892593348eSBarry Smith } 18902593348eSBarry Smith 18915615d1e5SSatish Balay #undef __FUNC__ 18925615d1e5SSatish Balay #define __FUNC__ "MatGetValues_SeqBAIJ" 18938f6be9afSLois Curfman McInnes int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 18947e67e3f9SSatish Balay { 18957e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 18967e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1897a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 18989df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 18997e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 19007e67e3f9SSatish Balay 19017e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 19027e67e3f9SSatish Balay row = im[k]; brow = row/bs; 1903e3372554SBarry Smith if (row < 0) SETERRQ(1,0,"Negative row"); 1904e3372554SBarry Smith if (row >= a->m) SETERRQ(1,0,"Row too large"); 1905a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 19067e67e3f9SSatish Balay nrow = ailen[brow]; 19077e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 1908e3372554SBarry Smith if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1909e3372554SBarry Smith if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1910a7c10996SSatish Balay col = in[l] ; 19117e67e3f9SSatish Balay bcol = col/bs; 19127e67e3f9SSatish Balay cidx = col%bs; 19137e67e3f9SSatish Balay ridx = row%bs; 19147e67e3f9SSatish Balay high = nrow; 19157e67e3f9SSatish Balay low = 0; /* assume unsorted */ 19167e67e3f9SSatish Balay while (high-low > 5) { 19177e67e3f9SSatish Balay t = (low+high)/2; 19187e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 19197e67e3f9SSatish Balay else low = t; 19207e67e3f9SSatish Balay } 19217e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 19227e67e3f9SSatish Balay if (rp[i] > bcol) break; 19237e67e3f9SSatish Balay if (rp[i] == bcol) { 19247e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 19257e67e3f9SSatish Balay goto finished; 19267e67e3f9SSatish Balay } 19277e67e3f9SSatish Balay } 19287e67e3f9SSatish Balay *v++ = zero; 19297e67e3f9SSatish Balay finished:; 19307e67e3f9SSatish Balay } 19317e67e3f9SSatish Balay } 19327e67e3f9SSatish Balay return 0; 19337e67e3f9SSatish Balay } 19347e67e3f9SSatish Balay 19355615d1e5SSatish Balay #undef __FUNC__ 1936d4bb536fSBarry Smith #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 19378f6be9afSLois Curfman McInnes int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 19385a838052SSatish Balay { 19395a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 19405a838052SSatish Balay *bs = baij->bs; 19415a838052SSatish Balay return 0; 19425a838052SSatish Balay } 19435a838052SSatish Balay 1944d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 19455615d1e5SSatish Balay #undef __FUNC__ 19465615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1947d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1948d9b7c43dSSatish Balay { 1949d9b7c43dSSatish Balay int i,row; 1950d9b7c43dSSatish Balay row = idx[0]; 1951d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1952d9b7c43dSSatish Balay 1953d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1954d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1955d9b7c43dSSatish Balay } 1956d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1957d9b7c43dSSatish Balay return 0; 1958d9b7c43dSSatish Balay } 1959d9b7c43dSSatish Balay 19605615d1e5SSatish Balay #undef __FUNC__ 19615615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_SeqBAIJ" 19628f6be9afSLois Curfman McInnes int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1963d9b7c43dSSatish Balay { 1964d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1965d9b7c43dSSatish Balay IS is_local; 1966d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1967d9b7c43dSSatish Balay PetscTruth flg; 1968d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1969d9b7c43dSSatish Balay 1970d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1971d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1972d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1973537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1974d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1975d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1976d9b7c43dSSatish Balay 1977d9b7c43dSSatish Balay i = 0; 1978d9b7c43dSSatish Balay while (i < is_n) { 1979e3372554SBarry Smith if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1980d9b7c43dSSatish Balay flg = PETSC_FALSE; 1981d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1982d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1983d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1984d9b7c43dSSatish Balay i += bs; 1985d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1986d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1987d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1988d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1989d9b7c43dSSatish Balay aa[0] = zero; 1990d9b7c43dSSatish Balay aa+=bs; 1991d9b7c43dSSatish Balay } 1992d9b7c43dSSatish Balay i++; 1993d9b7c43dSSatish Balay } 1994d9b7c43dSSatish Balay } 1995d9b7c43dSSatish Balay if (diag) { 1996d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1997d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1998d9b7c43dSSatish Balay } 1999d9b7c43dSSatish Balay } 2000d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 2001d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2002d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 20039a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2004d9b7c43dSSatish Balay 2005d9b7c43dSSatish Balay return 0; 2006d9b7c43dSSatish Balay } 20071c351548SSatish Balay 20085615d1e5SSatish Balay #undef __FUNC__ 2009d4bb536fSBarry Smith #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2010ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 2011ba4ca20aSSatish Balay { 2012ba4ca20aSSatish Balay static int called = 0; 2013ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 2014ba4ca20aSSatish Balay 2015ba4ca20aSSatish Balay if (called) return 0; else called = 1; 2016ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2017ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 2018ba4ca20aSSatish Balay return 0; 2019ba4ca20aSSatish Balay } 2020d9b7c43dSSatish Balay 20212593348eSBarry Smith /* -------------------------------------------------------------------*/ 2022cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 20239867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2024f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2025faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 20261a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 2027b6490206SBarry Smith 0,0, 2028de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 2029b6490206SBarry Smith 0, 2030f2501298SSatish Balay MatTranspose_SeqBAIJ, 203117e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 20329867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2033584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 2034b6490206SBarry Smith 0, 2035d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 20367fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2037b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2038de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 2039d402145bSBarry Smith 0,0, 2040b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 2041b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 2042af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 20437e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 2044ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 20453b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 20463b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 204792c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 204892c4ed94SBarry Smith 0,0,0,0,0,0, 204992c4ed94SBarry Smith MatSetValuesBlocked_SeqBAIJ}; 20502593348eSBarry Smith 20515615d1e5SSatish Balay #undef __FUNC__ 20525615d1e5SSatish Balay #define __FUNC__ "MatCreateSeqBAIJ" 20532593348eSBarry Smith /*@C 205444cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 205544cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 205644cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 20572bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 20582bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 20592593348eSBarry Smith 20602593348eSBarry Smith Input Parameters: 2061029af93fSBarry Smith . comm - MPI communicator, set to PETSC_COMM_SELF 2062b6490206SBarry Smith . bs - size of block 20632593348eSBarry Smith . m - number of rows 20642593348eSBarry Smith . n - number of columns 2065b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 20662bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 20672bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 20682593348eSBarry Smith 20692593348eSBarry Smith Output Parameter: 20702593348eSBarry Smith . A - the matrix 20712593348eSBarry Smith 20720513a670SBarry Smith Options Database Keys: 20730513a670SBarry Smith $ -mat_no_unroll - uses code that does not unroll the loops in the 20740effe34fSLois Curfman McInnes $ block calculations (much slower) 20750513a670SBarry Smith $ -mat_block_size - size of the blocks to use 20760513a670SBarry Smith 20772593348eSBarry Smith Notes: 207844cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 20792593348eSBarry Smith storage. That is, the stored row and column indices can begin at 208044cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 20812593348eSBarry Smith 20822593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 20832593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 20842593348eSBarry Smith allocation. For additional details, see the users manual chapter on 20856da5968aSLois Curfman McInnes matrices. 20862593348eSBarry Smith 208744cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 20882593348eSBarry Smith @*/ 2089b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 20902593348eSBarry Smith { 20912593348eSBarry Smith Mat B; 2092b6490206SBarry Smith Mat_SeqBAIJ *b; 20933b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 20943b2fbd54SBarry Smith 20953b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 2096e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2097b6490206SBarry Smith 20980513a670SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 20990513a670SBarry Smith 2100f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 2101e3372554SBarry Smith SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 21022593348eSBarry Smith 21032593348eSBarry Smith *A = 0; 2104d4bb536fSBarry Smith PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 21052593348eSBarry Smith PLogObjectCreate(B); 2106b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 210744cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 21082593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 21091a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 21101a6a6d98SBarry Smith if (!flg) { 21117fc0212eSBarry Smith switch (bs) { 21127fc0212eSBarry Smith case 1: 21137fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21141a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 211539b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 2116f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 21177fc0212eSBarry Smith break; 21184eeb42bcSBarry Smith case 2: 21194eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 21201a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 212139b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 2122f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 21236d84be18SBarry Smith break; 2124f327dd97SBarry Smith case 3: 2125f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 21261a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 212739b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 2128f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 21294eeb42bcSBarry Smith break; 21306d84be18SBarry Smith case 4: 21316d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 21321a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 213339b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 2134f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 21356d84be18SBarry Smith break; 21366d84be18SBarry Smith case 5: 21376d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 21381a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 213939b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 2140f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 21416d84be18SBarry Smith break; 214248e9ddb2SSatish Balay case 7: 214348e9ddb2SSatish Balay B->ops.mult = MatMult_SeqBAIJ_7; 214448e9ddb2SSatish Balay B->ops.solve = MatSolve_SeqBAIJ_7; 214548e9ddb2SSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_7; 214648e9ddb2SSatish Balay break; 21477fc0212eSBarry Smith } 21481a6a6d98SBarry Smith } 2149b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 2150b6490206SBarry Smith B->view = MatView_SeqBAIJ; 21512593348eSBarry Smith B->factor = 0; 21522593348eSBarry Smith B->lupivotthreshold = 1.0; 215390f02eecSBarry Smith B->mapping = 0; 21542593348eSBarry Smith b->row = 0; 21552593348eSBarry Smith b->col = 0; 21562593348eSBarry Smith b->reallocs = 0; 21572593348eSBarry Smith 215844cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 215944cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 2160b6490206SBarry Smith b->mbs = mbs; 2161f2501298SSatish Balay b->nbs = nbs; 2162b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 21632593348eSBarry Smith if (nnz == PETSC_NULL) { 2164119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 21652593348eSBarry Smith else if (nz <= 0) nz = 1; 2166b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2167b6490206SBarry Smith nz = nz*mbs; 21682593348eSBarry Smith } 21692593348eSBarry Smith else { 21702593348eSBarry Smith nz = 0; 2171b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 21722593348eSBarry Smith } 21732593348eSBarry Smith 21742593348eSBarry Smith /* allocate the matrix space */ 21757e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 21762593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 21777e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 21787e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 21792593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 21802593348eSBarry Smith b->i = b->j + nz; 21812593348eSBarry Smith b->singlemalloc = 1; 21822593348eSBarry Smith 2183de6a44a3SBarry Smith b->i[0] = 0; 2184b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 21852593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 21862593348eSBarry Smith } 21872593348eSBarry Smith 2188b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2189b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2190f09e8eb9SSatish Balay PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2191b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 21922593348eSBarry Smith 2193b6490206SBarry Smith b->bs = bs; 21949df24120SSatish Balay b->bs2 = bs2; 2195b6490206SBarry Smith b->mbs = mbs; 21962593348eSBarry Smith b->nz = 0; 219718e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 21982593348eSBarry Smith b->sorted = 0; 21992593348eSBarry Smith b->roworiented = 1; 22002593348eSBarry Smith b->nonew = 0; 22012593348eSBarry Smith b->diag = 0; 22022593348eSBarry Smith b->solve_work = 0; 2203de6a44a3SBarry Smith b->mult_work = 0; 22042593348eSBarry Smith b->spptr = 0; 22054e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 22064e220ebcSLois Curfman McInnes 22072593348eSBarry Smith *A = B; 22082593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 22092593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 22102593348eSBarry Smith return 0; 22112593348eSBarry Smith } 22122593348eSBarry Smith 22135615d1e5SSatish Balay #undef __FUNC__ 22145615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2215b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 22162593348eSBarry Smith { 22172593348eSBarry Smith Mat C; 2218b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 22199df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2220de6a44a3SBarry Smith 2221e3372554SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 22222593348eSBarry Smith 22232593348eSBarry Smith *B = 0; 2224d4bb536fSBarry Smith PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 22252593348eSBarry Smith PLogObjectCreate(C); 2226b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 22272593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2228b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 2229b6490206SBarry Smith C->view = MatView_SeqBAIJ; 22302593348eSBarry Smith C->factor = A->factor; 22312593348eSBarry Smith c->row = 0; 22322593348eSBarry Smith c->col = 0; 22332593348eSBarry Smith C->assembled = PETSC_TRUE; 22342593348eSBarry Smith 223544cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 223644cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 223744cd7ae7SLois Curfman McInnes C->M = a->m; 223844cd7ae7SLois Curfman McInnes C->N = a->n; 223944cd7ae7SLois Curfman McInnes 2240b6490206SBarry Smith c->bs = a->bs; 22419df24120SSatish Balay c->bs2 = a->bs2; 2242b6490206SBarry Smith c->mbs = a->mbs; 2243b6490206SBarry Smith c->nbs = a->nbs; 22442593348eSBarry Smith 2245b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2246b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2247b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22482593348eSBarry Smith c->imax[i] = a->imax[i]; 22492593348eSBarry Smith c->ilen[i] = a->ilen[i]; 22502593348eSBarry Smith } 22512593348eSBarry Smith 22522593348eSBarry Smith /* allocate the matrix space */ 22532593348eSBarry Smith c->singlemalloc = 1; 22547e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 22552593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 22567e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 2257de6a44a3SBarry Smith c->i = c->j + nz; 2258b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2259b6490206SBarry Smith if (mbs > 0) { 2260de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 22612593348eSBarry Smith if (cpvalues == COPY_VALUES) { 22627e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 22632593348eSBarry Smith } 22642593348eSBarry Smith } 22652593348eSBarry Smith 2266f09e8eb9SSatish Balay PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 22672593348eSBarry Smith c->sorted = a->sorted; 22682593348eSBarry Smith c->roworiented = a->roworiented; 22692593348eSBarry Smith c->nonew = a->nonew; 22702593348eSBarry Smith 22712593348eSBarry Smith if (a->diag) { 2272b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2273b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2274b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 22752593348eSBarry Smith c->diag[i] = a->diag[i]; 22762593348eSBarry Smith } 22772593348eSBarry Smith } 22782593348eSBarry Smith else c->diag = 0; 22792593348eSBarry Smith c->nz = a->nz; 22802593348eSBarry Smith c->maxnz = a->maxnz; 22812593348eSBarry Smith c->solve_work = 0; 22822593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 22837fc0212eSBarry Smith c->mult_work = 0; 22842593348eSBarry Smith *B = C; 22852593348eSBarry Smith return 0; 22862593348eSBarry Smith } 22872593348eSBarry Smith 22885615d1e5SSatish Balay #undef __FUNC__ 22895615d1e5SSatish Balay #define __FUNC__ "MatLoad_SeqBAIJ" 229019bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 22912593348eSBarry Smith { 2292b6490206SBarry Smith Mat_SeqBAIJ *a; 22932593348eSBarry Smith Mat B; 2294de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2295b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 229635aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2297a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 2298b6490206SBarry Smith Scalar *aa; 229919bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 23002593348eSBarry Smith 23011a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2302de6a44a3SBarry Smith bs2 = bs*bs; 2303b6490206SBarry Smith 23042593348eSBarry Smith MPI_Comm_size(comm,&size); 2305e3372554SBarry Smith if (size > 1) SETERRQ(1,0,"view must have one processor"); 230690ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 23070752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 2308e3372554SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 23092593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 23102593348eSBarry Smith 2311e3372554SBarry Smith if (M != N) SETERRQ(1,0,"Can only do square matrices"); 231235aab85fSBarry Smith 231335aab85fSBarry Smith /* 231435aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 231535aab85fSBarry Smith divisible by the blocksize 231635aab85fSBarry Smith */ 2317b6490206SBarry Smith mbs = M/bs; 231835aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 231935aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 232035aab85fSBarry Smith else mbs++; 232135aab85fSBarry Smith if (extra_rows) { 2322537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 232335aab85fSBarry Smith } 2324b6490206SBarry Smith 23252593348eSBarry Smith /* read in row lengths */ 232635aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 23270752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 232835aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 23292593348eSBarry Smith 2330b6490206SBarry Smith /* read in column indices */ 233135aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 23320752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 233335aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2334b6490206SBarry Smith 2335b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 2336b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2337b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 233835aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 233935aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 234035aab85fSBarry Smith masked = mask + mbs; 2341b6490206SBarry Smith rowcount = 0; nzcount = 0; 2342b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 234335aab85fSBarry Smith nmask = 0; 2344b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2345b6490206SBarry Smith kmax = rowlengths[rowcount]; 2346b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 234735aab85fSBarry Smith tmp = jj[nzcount++]/bs; 234835aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2349b6490206SBarry Smith } 2350b6490206SBarry Smith rowcount++; 2351b6490206SBarry Smith } 235235aab85fSBarry Smith browlengths[i] += nmask; 235335aab85fSBarry Smith /* zero out the mask elements we set */ 235435aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2355b6490206SBarry Smith } 2356b6490206SBarry Smith 23572593348eSBarry Smith /* create our matrix */ 23583eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 23592593348eSBarry Smith B = *A; 2360b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 23612593348eSBarry Smith 23622593348eSBarry Smith /* set matrix "i" values */ 2363de6a44a3SBarry Smith a->i[0] = 0; 2364b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 2365b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2366b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 23672593348eSBarry Smith } 2368b6490206SBarry Smith a->nz = 0; 2369b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 23702593348eSBarry Smith 2371b6490206SBarry Smith /* read in nonzero values */ 237235aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 23730752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 237435aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2375b6490206SBarry Smith 2376b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2377b6490206SBarry Smith nzcount = 0; jcount = 0; 2378b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2379b6490206SBarry Smith nzcountb = nzcount; 238035aab85fSBarry Smith nmask = 0; 2381b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2382b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2383b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 238435aab85fSBarry Smith tmp = jj[nzcount++]/bs; 238535aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2386b6490206SBarry Smith } 2387b6490206SBarry Smith rowcount++; 2388b6490206SBarry Smith } 2389de6a44a3SBarry Smith /* sort the masked values */ 239077c4ece6SBarry Smith PetscSortInt(nmask,masked); 2391de6a44a3SBarry Smith 2392b6490206SBarry Smith /* set "j" values into matrix */ 2393b6490206SBarry Smith maskcount = 1; 239435aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 239535aab85fSBarry Smith a->j[jcount++] = masked[j]; 2396de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2397b6490206SBarry Smith } 2398b6490206SBarry Smith /* set "a" values into matrix */ 2399de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2400b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2401b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2402b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2403de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2404de6a44a3SBarry Smith block = mask[tmp] - 1; 2405de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2406de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2407b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2408b6490206SBarry Smith } 2409b6490206SBarry Smith } 241035aab85fSBarry Smith /* zero out the mask elements we set */ 241135aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2412b6490206SBarry Smith } 2413e3372554SBarry Smith if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2414b6490206SBarry Smith 2415b6490206SBarry Smith PetscFree(rowlengths); 2416b6490206SBarry Smith PetscFree(browlengths); 2417b6490206SBarry Smith PetscFree(aa); 2418b6490206SBarry Smith PetscFree(jj); 2419b6490206SBarry Smith PetscFree(mask); 2420b6490206SBarry Smith 2421b6490206SBarry Smith B->assembled = PETSC_TRUE; 2422de6a44a3SBarry Smith 2423*9c01be13SBarry Smith ierr = MatView_Private(B); CHKERRQ(ierr); 24242593348eSBarry Smith return 0; 24252593348eSBarry Smith } 24262593348eSBarry Smith 24272593348eSBarry Smith 24282593348eSBarry Smith 2429