12593348eSBarry Smith #ifndef lint 2*e2f3b5e9SSatish Balay static char vcid[] = "$Id: baij.c,v 1.76 1996/11/30 20:35:21 curfman Exp balay $"; 32593348eSBarry Smith #endif 42593348eSBarry Smith 52593348eSBarry Smith /* 6b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 72593348eSBarry Smith matrix storage format. 82593348eSBarry Smith */ 970f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 101a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 111a6a6d98SBarry Smith #include "src/inline/spops.h" 122593348eSBarry Smith #include "petsc.h" 133b547af2SSatish Balay 142593348eSBarry Smith 15de6a44a3SBarry Smith /* 16de6a44a3SBarry Smith Adds diagonal pointers to sparse matrix structure. 17de6a44a3SBarry Smith */ 18de6a44a3SBarry Smith 19de6a44a3SBarry Smith int MatMarkDiag_SeqBAIJ(Mat A) 20de6a44a3SBarry Smith { 21de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 227fc0212eSBarry Smith int i,j, *diag, m = a->mbs; 23de6a44a3SBarry Smith 24de6a44a3SBarry Smith diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 25de6a44a3SBarry Smith PLogObjectMemory(A,(m+1)*sizeof(int)); 267fc0212eSBarry Smith for ( i=0; i<m; i++ ) { 27de6a44a3SBarry Smith for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 28de6a44a3SBarry Smith if (a->j[j] == i) { 29de6a44a3SBarry Smith diag[i] = j; 30de6a44a3SBarry Smith break; 31de6a44a3SBarry Smith } 32de6a44a3SBarry Smith } 33de6a44a3SBarry Smith } 34de6a44a3SBarry Smith a->diag = diag; 35de6a44a3SBarry Smith return 0; 36de6a44a3SBarry Smith } 372593348eSBarry Smith 382593348eSBarry Smith #include "draw.h" 392593348eSBarry Smith #include "pinclude/pviewer.h" 4077c4ece6SBarry Smith #include "sys.h" 412593348eSBarry Smith 423b2fbd54SBarry Smith extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 433b2fbd54SBarry Smith 443b2fbd54SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 453b2fbd54SBarry Smith PetscTruth *done) 463b2fbd54SBarry Smith { 473b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 483b2fbd54SBarry Smith int ierr, n = a->mbs,i; 493b2fbd54SBarry Smith 503b2fbd54SBarry Smith *nn = n; 513b2fbd54SBarry Smith if (!ia) return 0; 523b2fbd54SBarry Smith if (symmetric) { 533b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 543b2fbd54SBarry Smith } else if (oshift == 1) { 553b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 563b2fbd54SBarry Smith int nz = a->i[n] + 1; 573b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]++; 583b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]++; 593b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 603b2fbd54SBarry Smith } else { 613b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 623b2fbd54SBarry Smith } 633b2fbd54SBarry Smith 643b2fbd54SBarry Smith return 0; 653b2fbd54SBarry Smith } 663b2fbd54SBarry Smith 673b2fbd54SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 683b2fbd54SBarry Smith PetscTruth *done) 693b2fbd54SBarry Smith { 703b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 713b2fbd54SBarry Smith int i,n = a->mbs; 723b2fbd54SBarry Smith 733b2fbd54SBarry Smith if (!ia) return 0; 743b2fbd54SBarry Smith if (symmetric) { 753b2fbd54SBarry Smith PetscFree(*ia); 763b2fbd54SBarry Smith PetscFree(*ja); 77af5da2bfSBarry Smith } else if (oshift == 1) { 783b2fbd54SBarry Smith int nz = a->i[n]; 793b2fbd54SBarry Smith for ( i=0; i<nz; i++ ) a->j[i]--; 803b2fbd54SBarry Smith for ( i=0; i<n+1; i++ ) a->i[i]--; 813b2fbd54SBarry Smith } 823b2fbd54SBarry Smith return 0; 833b2fbd54SBarry Smith } 843b2fbd54SBarry Smith 853b2fbd54SBarry Smith 86b6490206SBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 872593348eSBarry Smith { 88b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 899df24120SSatish Balay int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 90b6490206SBarry Smith Scalar *aa; 912593348eSBarry Smith 9290ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 932593348eSBarry Smith col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 942593348eSBarry Smith col_lens[0] = MAT_COOKIE; 953b2fbd54SBarry Smith 962593348eSBarry Smith col_lens[1] = a->m; 972593348eSBarry Smith col_lens[2] = a->n; 987e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 992593348eSBarry Smith 1002593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 101b6490206SBarry Smith count = 0; 102b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 103b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 104b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 105b6490206SBarry Smith } 1062593348eSBarry Smith } 10777c4ece6SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 1082593348eSBarry Smith PetscFree(col_lens); 1092593348eSBarry Smith 1102593348eSBarry Smith /* store column indices (zero start index) */ 1117e67e3f9SSatish Balay jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 112b6490206SBarry Smith count = 0; 113b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 114b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 115b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 116b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 117b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 1182593348eSBarry Smith } 1192593348eSBarry Smith } 120b6490206SBarry Smith } 121b6490206SBarry Smith } 1227e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 123b6490206SBarry Smith PetscFree(jj); 1242593348eSBarry Smith 1252593348eSBarry Smith /* store nonzero values */ 1267e67e3f9SSatish Balay aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 127b6490206SBarry Smith count = 0; 128b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 129b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 130b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 131b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 1327e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 133b6490206SBarry Smith } 134b6490206SBarry Smith } 135b6490206SBarry Smith } 136b6490206SBarry Smith } 1377e67e3f9SSatish Balay ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 138b6490206SBarry Smith PetscFree(aa); 1392593348eSBarry Smith return 0; 1402593348eSBarry Smith } 1412593348eSBarry Smith 142b6490206SBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 1432593348eSBarry Smith { 144b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1459df24120SSatish Balay int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 1462593348eSBarry Smith FILE *fd; 1472593348eSBarry Smith char *outputname; 1482593348eSBarry Smith 14990ace30eSBarry Smith ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1502593348eSBarry Smith ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 15190ace30eSBarry Smith ierr = ViewerGetFormat(viewer,&format); 152639f9d9dSBarry Smith if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1537ddc982cSLois Curfman McInnes fprintf(fd," block size is %d\n",bs); 1542593348eSBarry Smith } 155639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 156b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 1572593348eSBarry Smith } 158639f9d9dSBarry Smith else if (format == VIEWER_FORMAT_ASCII_COMMON) { 15944cd7ae7SLois Curfman McInnes for ( i=0; i<a->mbs; i++ ) { 16044cd7ae7SLois Curfman McInnes for ( j=0; j<bs; j++ ) { 16144cd7ae7SLois Curfman McInnes fprintf(fd,"row %d:",i*bs+j); 16244cd7ae7SLois Curfman McInnes for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 16344cd7ae7SLois Curfman McInnes for ( l=0; l<bs; l++ ) { 16444cd7ae7SLois Curfman McInnes #if defined(PETSC_COMPLEX) 16544cd7ae7SLois Curfman McInnes if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 16644cd7ae7SLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 16744cd7ae7SLois Curfman McInnes real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 16844cd7ae7SLois Curfman McInnes else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 16944cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 17044cd7ae7SLois Curfman McInnes #else 17144cd7ae7SLois Curfman McInnes if (a->a[bs2*k + l*bs + j] != 0.0) 17244cd7ae7SLois Curfman McInnes fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 17344cd7ae7SLois Curfman McInnes #endif 17444cd7ae7SLois Curfman McInnes } 17544cd7ae7SLois Curfman McInnes } 17644cd7ae7SLois Curfman McInnes fprintf(fd,"\n"); 17744cd7ae7SLois Curfman McInnes } 17844cd7ae7SLois Curfman McInnes } 17944cd7ae7SLois Curfman McInnes } 1802593348eSBarry Smith else { 181b6490206SBarry Smith for ( i=0; i<a->mbs; i++ ) { 182b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 183b6490206SBarry Smith fprintf(fd,"row %d:",i*bs+j); 184b6490206SBarry Smith for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 185b6490206SBarry Smith for ( l=0; l<bs; l++ ) { 18688685aaeSLois Curfman McInnes #if defined(PETSC_COMPLEX) 1877e67e3f9SSatish Balay if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 18888685aaeSLois Curfman McInnes fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 1897e67e3f9SSatish Balay real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 19088685aaeSLois Curfman McInnes } 19188685aaeSLois Curfman McInnes else { 1927e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 19388685aaeSLois Curfman McInnes } 19488685aaeSLois Curfman McInnes #else 1957e67e3f9SSatish Balay fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 19688685aaeSLois Curfman McInnes #endif 1972593348eSBarry Smith } 1982593348eSBarry Smith } 1992593348eSBarry Smith fprintf(fd,"\n"); 2002593348eSBarry Smith } 2012593348eSBarry Smith } 202b6490206SBarry Smith } 2032593348eSBarry Smith fflush(fd); 2042593348eSBarry Smith return 0; 2052593348eSBarry Smith } 2062593348eSBarry Smith 2073270192aSSatish Balay static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 2083270192aSSatish Balay { 2093270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 2103270192aSSatish Balay int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 2113270192aSSatish Balay double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 2123270192aSSatish Balay Scalar *aa; 2133270192aSSatish Balay Draw draw; 2143270192aSSatish Balay DrawButton button; 2153270192aSSatish Balay PetscTruth isnull; 2163270192aSSatish Balay 2173270192aSSatish Balay ViewerDrawGetDraw(viewer,&draw); 2183270192aSSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 2193270192aSSatish Balay 2203270192aSSatish Balay xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 2213270192aSSatish Balay xr += w; yr += h; xl = -w; yl = -h; 2223270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2233270192aSSatish Balay /* loop over matrix elements drawing boxes */ 2243270192aSSatish Balay color = DRAW_BLUE; 2253270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2263270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2273270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2283270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2293270192aSSatish Balay aa = a->a + j*bs2; 2303270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2313270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2323270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 2333270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2343270192aSSatish Balay } 2353270192aSSatish Balay } 2363270192aSSatish Balay } 2373270192aSSatish Balay } 2383270192aSSatish Balay color = DRAW_CYAN; 2393270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2403270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2413270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2423270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2433270192aSSatish Balay aa = a->a + j*bs2; 2443270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2453270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2463270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 2473270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2483270192aSSatish Balay } 2493270192aSSatish Balay } 2503270192aSSatish Balay } 2513270192aSSatish Balay } 2523270192aSSatish Balay 2533270192aSSatish Balay color = DRAW_RED; 2543270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2553270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2563270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2573270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2583270192aSSatish Balay aa = a->a + j*bs2; 2593270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2603270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2613270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 2623270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2633270192aSSatish Balay } 2643270192aSSatish Balay } 2653270192aSSatish Balay } 2663270192aSSatish Balay } 2673270192aSSatish Balay 2683270192aSSatish Balay DrawFlush(draw); 2693270192aSSatish Balay DrawGetPause(draw,&pause); 2703270192aSSatish Balay if (pause >= 0) { PetscSleep(pause); return 0;} 2713270192aSSatish Balay 2723270192aSSatish Balay /* allow the matrix to zoom or shrink */ 2733270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 2743270192aSSatish Balay while (button != BUTTON_RIGHT) { 2753270192aSSatish Balay DrawClear(draw); 2763270192aSSatish Balay if (button == BUTTON_LEFT) scale = .5; 2773270192aSSatish Balay else if (button == BUTTON_CENTER) scale = 2.; 2783270192aSSatish Balay xl = scale*(xl + w - xc) + xc - w*scale; 2793270192aSSatish Balay xr = scale*(xr - w - xc) + xc + w*scale; 2803270192aSSatish Balay yl = scale*(yl + h - yc) + yc - h*scale; 2813270192aSSatish Balay yr = scale*(yr - h - yc) + yc + h*scale; 2823270192aSSatish Balay w *= scale; h *= scale; 2833270192aSSatish Balay ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 2843270192aSSatish Balay color = DRAW_BLUE; 2853270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 2863270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 2873270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 2883270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 2893270192aSSatish Balay aa = a->a + j*bs2; 2903270192aSSatish Balay for ( k=0; k<bs; k++ ) { 2913270192aSSatish Balay for ( l=0; l<bs; l++ ) { 2923270192aSSatish Balay if (PetscReal(*aa++) >= 0.) continue; 2933270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 2943270192aSSatish Balay } 2953270192aSSatish Balay } 2963270192aSSatish Balay } 2973270192aSSatish Balay } 2983270192aSSatish Balay color = DRAW_CYAN; 2993270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3003270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3013270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3023270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3033270192aSSatish Balay aa = a->a + j*bs2; 3043270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3053270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3063270192aSSatish Balay if (PetscReal(*aa++) != 0.) continue; 3073270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3083270192aSSatish Balay } 3093270192aSSatish Balay } 3103270192aSSatish Balay } 3113270192aSSatish Balay } 3123270192aSSatish Balay 3133270192aSSatish Balay color = DRAW_RED; 3143270192aSSatish Balay for ( i=0,row=0; i<mbs; i++,row+=bs ) { 3153270192aSSatish Balay for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 3163270192aSSatish Balay y_l = a->m - row - 1.0; y_r = y_l + 1.0; 3173270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 3183270192aSSatish Balay aa = a->a + j*bs2; 3193270192aSSatish Balay for ( k=0; k<bs; k++ ) { 3203270192aSSatish Balay for ( l=0; l<bs; l++ ) { 3213270192aSSatish Balay if (PetscReal(*aa++) <= 0.) continue; 3223270192aSSatish Balay DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 3233270192aSSatish Balay } 3243270192aSSatish Balay } 3253270192aSSatish Balay } 3263270192aSSatish Balay } 3273270192aSSatish Balay ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 3283270192aSSatish Balay } 3293270192aSSatish Balay return 0; 3303270192aSSatish Balay } 3313270192aSSatish Balay 332b6490206SBarry Smith static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 3332593348eSBarry Smith { 3342593348eSBarry Smith Mat A = (Mat) obj; 33519bcc07fSBarry Smith ViewerType vtype; 33619bcc07fSBarry Smith int ierr; 3372593348eSBarry Smith 33819bcc07fSBarry Smith ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 33919bcc07fSBarry Smith if (vtype == MATLAB_VIEWER) { 340b6490206SBarry Smith SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 3412593348eSBarry Smith } 34219bcc07fSBarry Smith else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 343b6490206SBarry Smith return MatView_SeqBAIJ_ASCII(A,viewer); 3442593348eSBarry Smith } 34519bcc07fSBarry Smith else if (vtype == BINARY_FILE_VIEWER) { 346b6490206SBarry Smith return MatView_SeqBAIJ_Binary(A,viewer); 3472593348eSBarry Smith } 34819bcc07fSBarry Smith else if (vtype == DRAW_VIEWER) { 3493270192aSSatish Balay return MatView_SeqBAIJ_Draw(A,viewer); 3502593348eSBarry Smith } 3512593348eSBarry Smith return 0; 3522593348eSBarry Smith } 353b6490206SBarry Smith 354119a934aSSatish Balay #define CHUNKSIZE 10 355cd0e1443SSatish Balay 356cd0e1443SSatish Balay /* This version has row oriented v */ 357639f9d9dSBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 358cd0e1443SSatish Balay { 359cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360cd0e1443SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 361cd0e1443SSatish Balay int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 362a7c10996SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 3639df24120SSatish Balay int ridx,cidx,bs2=a->bs2; 364cd0e1443SSatish Balay Scalar *ap,value,*aa=a->a,*bap; 365cd0e1443SSatish Balay 366cd0e1443SSatish Balay for ( k=0; k<m; k++ ) { /* loop over added rows */ 367cd0e1443SSatish Balay row = im[k]; brow = row/bs; 3683b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 369cd0e1443SSatish Balay if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 370cd0e1443SSatish Balay if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 3713b2fbd54SBarry Smith #endif 372a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 373cd0e1443SSatish Balay rmax = imax[brow]; nrow = ailen[brow]; 374cd0e1443SSatish Balay low = 0; 375cd0e1443SSatish Balay for ( l=0; l<n; l++ ) { /* loop over added columns */ 3763b2fbd54SBarry Smith #if defined(PETSC_BOPT_g) 377cd0e1443SSatish Balay if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 378cd0e1443SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 3793b2fbd54SBarry Smith #endif 380a7c10996SSatish Balay col = in[l]; bcol = col/bs; 381cd0e1443SSatish Balay ridx = row % bs; cidx = col % bs; 382cd0e1443SSatish Balay if (roworiented) { 383cd0e1443SSatish Balay value = *v++; 384cd0e1443SSatish Balay } 385cd0e1443SSatish Balay else { 386cd0e1443SSatish Balay value = v[k + l*m]; 387cd0e1443SSatish Balay } 388cd0e1443SSatish Balay if (!sorted) low = 0; high = nrow; 389cd0e1443SSatish Balay while (high-low > 5) { 390cd0e1443SSatish Balay t = (low+high)/2; 391cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 392cd0e1443SSatish Balay else low = t; 393cd0e1443SSatish Balay } 394cd0e1443SSatish Balay for ( i=low; i<high; i++ ) { 395cd0e1443SSatish Balay if (rp[i] > bcol) break; 396cd0e1443SSatish Balay if (rp[i] == bcol) { 3977e67e3f9SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 398cd0e1443SSatish Balay if (is == ADD_VALUES) *bap += value; 399cd0e1443SSatish Balay else *bap = value; 400cd0e1443SSatish Balay goto noinsert; 401cd0e1443SSatish Balay } 402cd0e1443SSatish Balay } 403cd0e1443SSatish Balay if (nonew) goto noinsert; 404cd0e1443SSatish Balay if (nrow >= rmax) { 405cd0e1443SSatish Balay /* there is no extra room in row, therefore enlarge */ 406cd0e1443SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 407cd0e1443SSatish Balay Scalar *new_a; 408cd0e1443SSatish Balay 409cd0e1443SSatish Balay /* malloc new storage space */ 4107e67e3f9SSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 411cd0e1443SSatish Balay new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 4127e67e3f9SSatish Balay new_j = (int *) (new_a + bs2*new_nz); 413cd0e1443SSatish Balay new_i = new_j + new_nz; 414cd0e1443SSatish Balay 415cd0e1443SSatish Balay /* copy over old data into new slots */ 416cd0e1443SSatish Balay for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 4177e67e3f9SSatish Balay for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 418a7c10996SSatish Balay PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 419a7c10996SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 420a7c10996SSatish Balay PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 421cd0e1443SSatish Balay len*sizeof(int)); 422a7c10996SSatish Balay PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 423a7c10996SSatish Balay PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 424a7c10996SSatish Balay PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 425a7c10996SSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 426cd0e1443SSatish Balay /* free up old matrix storage */ 427cd0e1443SSatish Balay PetscFree(a->a); 428cd0e1443SSatish Balay if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 429cd0e1443SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 430cd0e1443SSatish Balay a->singlemalloc = 1; 431cd0e1443SSatish Balay 432a7c10996SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 433cd0e1443SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 4347e67e3f9SSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 43518e7b8e4SLois Curfman McInnes a->maxnz += bs2*CHUNKSIZE; 436cd0e1443SSatish Balay a->reallocs++; 437119a934aSSatish Balay a->nz++; 438cd0e1443SSatish Balay } 4397e67e3f9SSatish Balay N = nrow++ - 1; 440cd0e1443SSatish Balay /* shift up all the later entries in this row */ 441cd0e1443SSatish Balay for ( ii=N; ii>=i; ii-- ) { 442cd0e1443SSatish Balay rp[ii+1] = rp[ii]; 4437e67e3f9SSatish Balay PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 444cd0e1443SSatish Balay } 4457e67e3f9SSatish Balay if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 446cd0e1443SSatish Balay rp[i] = bcol; 4477e67e3f9SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 448cd0e1443SSatish Balay noinsert:; 449cd0e1443SSatish Balay low = i; 450cd0e1443SSatish Balay } 451cd0e1443SSatish Balay ailen[brow] = nrow; 452cd0e1443SSatish Balay } 453cd0e1443SSatish Balay return 0; 454cd0e1443SSatish Balay } 455cd0e1443SSatish Balay 4560b824a48SSatish Balay static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 4570b824a48SSatish Balay { 4580b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4590b824a48SSatish Balay *m = a->m; *n = a->n; 4600b824a48SSatish Balay return 0; 4610b824a48SSatish Balay } 4620b824a48SSatish Balay 4630b824a48SSatish Balay static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 4640b824a48SSatish Balay { 4650b824a48SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4660b824a48SSatish Balay *m = 0; *n = a->m; 4670b824a48SSatish Balay return 0; 4680b824a48SSatish Balay } 4690b824a48SSatish Balay 4709867e207SSatish Balay int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 4719867e207SSatish Balay { 4729867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 4737e67e3f9SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 4749867e207SSatish Balay Scalar *aa,*v_i,*aa_i; 4759867e207SSatish Balay 4769867e207SSatish Balay bs = a->bs; 4779867e207SSatish Balay ai = a->i; 4789867e207SSatish Balay aj = a->j; 4799867e207SSatish Balay aa = a->a; 4809df24120SSatish Balay bs2 = a->bs2; 4819867e207SSatish Balay 4829867e207SSatish Balay if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 4839867e207SSatish Balay 4849867e207SSatish Balay bn = row/bs; /* Block number */ 4859867e207SSatish Balay bp = row % bs; /* Block Position */ 4869867e207SSatish Balay M = ai[bn+1] - ai[bn]; 4879867e207SSatish Balay *nz = bs*M; 4889867e207SSatish Balay 4899867e207SSatish Balay if (v) { 4909867e207SSatish Balay *v = 0; 4919867e207SSatish Balay if (*nz) { 4929867e207SSatish Balay *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 4939867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 4949867e207SSatish Balay v_i = *v + i*bs; 4957e67e3f9SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 4967e67e3f9SSatish Balay for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 4979867e207SSatish Balay } 4989867e207SSatish Balay } 4999867e207SSatish Balay } 5009867e207SSatish Balay 5019867e207SSatish Balay if (idx) { 5029867e207SSatish Balay *idx = 0; 5039867e207SSatish Balay if (*nz) { 5049867e207SSatish Balay *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 5059867e207SSatish Balay for ( i=0; i<M; i++ ) { /* for each block in the block row */ 5069867e207SSatish Balay idx_i = *idx + i*bs; 5079867e207SSatish Balay itmp = bs*aj[ai[bn] + i]; 5089867e207SSatish Balay for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 5099867e207SSatish Balay } 5109867e207SSatish Balay } 5119867e207SSatish Balay } 5129867e207SSatish Balay return 0; 5139867e207SSatish Balay } 5149867e207SSatish Balay 5159867e207SSatish Balay int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 5169867e207SSatish Balay { 5179867e207SSatish Balay if (idx) {if (*idx) PetscFree(*idx);} 5189867e207SSatish Balay if (v) {if (*v) PetscFree(*v);} 5199867e207SSatish Balay return 0; 5209867e207SSatish Balay } 521b6490206SBarry Smith 522f2501298SSatish Balay static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 523f2501298SSatish Balay { 524f2501298SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 525f2501298SSatish Balay Mat C; 526f2501298SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 5279df24120SSatish Balay int *rows,*cols,bs2=a->bs2; 528f2501298SSatish Balay Scalar *array=a->a; 529f2501298SSatish Balay 530f2501298SSatish Balay if (B==PETSC_NULL && mbs!=nbs) 531f2501298SSatish Balay SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 532f2501298SSatish Balay col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 533f2501298SSatish Balay PetscMemzero(col,(1+nbs)*sizeof(int)); 534a7c10996SSatish Balay 535a7c10996SSatish Balay for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 536f2501298SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 537f2501298SSatish Balay PetscFree(col); 538f2501298SSatish Balay rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 539f2501298SSatish Balay cols = rows + bs; 540f2501298SSatish Balay for ( i=0; i<mbs; i++ ) { 541f2501298SSatish Balay cols[0] = i*bs; 542f2501298SSatish Balay for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 543f2501298SSatish Balay len = ai[i+1] - ai[i]; 544f2501298SSatish Balay for ( j=0; j<len; j++ ) { 545f2501298SSatish Balay rows[0] = (*aj++)*bs; 546f2501298SSatish Balay for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 547f2501298SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 548f2501298SSatish Balay array += bs2; 549f2501298SSatish Balay } 550f2501298SSatish Balay } 5511073c447SSatish Balay PetscFree(rows); 552f2501298SSatish Balay 5536d4a8577SBarry Smith ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 5546d4a8577SBarry Smith ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 555f2501298SSatish Balay 556f2501298SSatish Balay if (B != PETSC_NULL) { 557f2501298SSatish Balay *B = C; 558f2501298SSatish Balay } else { 559f2501298SSatish Balay /* This isn't really an in-place transpose */ 560f2501298SSatish Balay PetscFree(a->a); 561f2501298SSatish Balay if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 562f2501298SSatish Balay if (a->diag) PetscFree(a->diag); 563f2501298SSatish Balay if (a->ilen) PetscFree(a->ilen); 564f2501298SSatish Balay if (a->imax) PetscFree(a->imax); 565f2501298SSatish Balay if (a->solve_work) PetscFree(a->solve_work); 566f2501298SSatish Balay PetscFree(a); 567f2501298SSatish Balay PetscMemcpy(A,C,sizeof(struct _Mat)); 568f2501298SSatish Balay PetscHeaderDestroy(C); 569f2501298SSatish Balay } 570f2501298SSatish Balay return 0; 571f2501298SSatish Balay } 572f2501298SSatish Balay 573f2501298SSatish Balay 574584200bdSSatish Balay static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 575584200bdSSatish Balay { 576584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 577584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 578a7c10996SSatish Balay int m = a->m,*ip, N, *ailen = a->ilen; 5799df24120SSatish Balay int mbs = a->mbs, bs2 = a->bs2; 580584200bdSSatish Balay Scalar *aa = a->a, *ap; 581584200bdSSatish Balay 5826d4a8577SBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) return 0; 583584200bdSSatish Balay 584584200bdSSatish Balay for ( i=1; i<mbs; i++ ) { 585584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 586584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 587584200bdSSatish Balay if (fshift) { 588a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 589584200bdSSatish Balay N = ailen[i]; 590584200bdSSatish Balay for ( j=0; j<N; j++ ) { 591584200bdSSatish Balay ip[j-fshift] = ip[j]; 5927e67e3f9SSatish Balay PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 593584200bdSSatish Balay } 594584200bdSSatish Balay } 595584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 596584200bdSSatish Balay } 597584200bdSSatish Balay if (mbs) { 598584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 599584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 600584200bdSSatish Balay } 601584200bdSSatish Balay /* reset ilen and imax for each row */ 602584200bdSSatish Balay for ( i=0; i<mbs; i++ ) { 603584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 604584200bdSSatish Balay } 605a7c10996SSatish Balay a->nz = ai[mbs]; 606584200bdSSatish Balay 607584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 608584200bdSSatish Balay if (fshift && a->diag) { 609584200bdSSatish Balay PetscFree(a->diag); 610584200bdSSatish Balay PLogObjectMemory(A,-(m+1)*sizeof(int)); 611584200bdSSatish Balay a->diag = 0; 612584200bdSSatish Balay } 6133d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 614219d9a1aSLois Curfman McInnes m,a->n,a->bs,fshift*bs2,a->nz*bs2); 6153d2013a6SLois Curfman McInnes PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 616584200bdSSatish Balay a->reallocs); 617*e2f3b5e9SSatish Balay a->reallocs = 0; 6184e220ebcSLois Curfman McInnes A->info.nz_unneeded = (double)fshift*bs2; 6194e220ebcSLois Curfman McInnes 620584200bdSSatish Balay return 0; 621584200bdSSatish Balay } 622584200bdSSatish Balay 623b6490206SBarry Smith static int MatZeroEntries_SeqBAIJ(Mat A) 6242593348eSBarry Smith { 625b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6269df24120SSatish Balay PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 6272593348eSBarry Smith return 0; 6282593348eSBarry Smith } 6292593348eSBarry Smith 630b6490206SBarry Smith int MatDestroy_SeqBAIJ(PetscObject obj) 6312593348eSBarry Smith { 6322593348eSBarry Smith Mat A = (Mat) obj; 633b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 63490f02eecSBarry Smith int ierr; 6352593348eSBarry Smith 6362593348eSBarry Smith #if defined(PETSC_LOG) 6372593348eSBarry Smith PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 6382593348eSBarry Smith #endif 6392593348eSBarry Smith PetscFree(a->a); 6402593348eSBarry Smith if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 6412593348eSBarry Smith if (a->diag) PetscFree(a->diag); 6422593348eSBarry Smith if (a->ilen) PetscFree(a->ilen); 6432593348eSBarry Smith if (a->imax) PetscFree(a->imax); 6442593348eSBarry Smith if (a->solve_work) PetscFree(a->solve_work); 645de6a44a3SBarry Smith if (a->mult_work) PetscFree(a->mult_work); 6462593348eSBarry Smith PetscFree(a); 64790f02eecSBarry Smith if (A->mapping) { 64890f02eecSBarry Smith ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 64990f02eecSBarry Smith } 650f2655603SLois Curfman McInnes PLogObjectDestroy(A); 651f2655603SLois Curfman McInnes PetscHeaderDestroy(A); 6522593348eSBarry Smith return 0; 6532593348eSBarry Smith } 6542593348eSBarry Smith 655b6490206SBarry Smith static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 6562593348eSBarry Smith { 657b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 6586d4a8577SBarry Smith if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 6596d4a8577SBarry Smith else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 6606d4a8577SBarry Smith else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 661219d9a1aSLois Curfman McInnes else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 6626d4a8577SBarry Smith else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 6636d4a8577SBarry Smith else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 6646d4a8577SBarry Smith else if (op == MAT_ROWS_SORTED || 665219d9a1aSLois Curfman McInnes op == MAT_ROWS_UNSORTED || 6666d4a8577SBarry Smith op == MAT_SYMMETRIC || 6676d4a8577SBarry Smith op == MAT_STRUCTURALLY_SYMMETRIC || 66890f02eecSBarry Smith op == MAT_YES_NEW_DIAGONALS || 66990f02eecSBarry Smith op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 67094a424c1SBarry Smith PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 6716d4a8577SBarry Smith else if (op == MAT_NO_NEW_DIAGONALS) 6726d4a8577SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 6732593348eSBarry Smith else 674b6490206SBarry Smith {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 6752593348eSBarry Smith return 0; 6762593348eSBarry Smith } 6772593348eSBarry Smith 6782593348eSBarry Smith 6792593348eSBarry Smith /* -------------------------------------------------------*/ 6802593348eSBarry Smith /* Should check that shapes of vectors and matrices match */ 6812593348eSBarry Smith /* -------------------------------------------------------*/ 682b6490206SBarry Smith #include "pinclude/plapack.h" 683b6490206SBarry Smith 68439b95ed1SBarry Smith static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 6852593348eSBarry Smith { 686b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 68739b95ed1SBarry Smith register Scalar *x,*z,*v,sum; 688c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 6892593348eSBarry Smith 690c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 691c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 692b6490206SBarry Smith 693119a934aSSatish Balay idx = a->j; 694119a934aSSatish Balay v = a->a; 695119a934aSSatish Balay ii = a->i; 696119a934aSSatish Balay 697119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 698119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 699119a934aSSatish Balay sum = 0.0; 700119a934aSSatish Balay while (n--) sum += *v++ * x[*idx++]; 7011a6a6d98SBarry Smith z[i] = sum; 702119a934aSSatish Balay } 703c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 704c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 70539b95ed1SBarry Smith PLogFlops(2*a->nz - a->m); 70639b95ed1SBarry Smith return 0; 70739b95ed1SBarry Smith } 70839b95ed1SBarry Smith 70939b95ed1SBarry Smith static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 71039b95ed1SBarry Smith { 71139b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 71239b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2; 71339b95ed1SBarry Smith register Scalar x1,x2; 71439b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 715c16cb8f2SBarry Smith int j,n; 71639b95ed1SBarry Smith 717c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 718c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 71939b95ed1SBarry Smith 72039b95ed1SBarry Smith idx = a->j; 72139b95ed1SBarry Smith v = a->a; 72239b95ed1SBarry Smith ii = a->i; 72339b95ed1SBarry Smith 724119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 725119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 726119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; 727119a934aSSatish Balay for ( j=0; j<n; j++ ) { 728119a934aSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 729119a934aSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 730119a934aSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 731119a934aSSatish Balay v += 4; 732119a934aSSatish Balay } 7331a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; 734119a934aSSatish Balay z += 2; 735119a934aSSatish Balay } 736c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 737c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 73839b95ed1SBarry Smith PLogFlops(4*a->nz - a->m); 73939b95ed1SBarry Smith return 0; 74039b95ed1SBarry Smith } 74139b95ed1SBarry Smith 74239b95ed1SBarry Smith static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 74339b95ed1SBarry Smith { 74439b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 74539b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 746c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 74739b95ed1SBarry Smith 748c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 749c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 75039b95ed1SBarry Smith 75139b95ed1SBarry Smith idx = a->j; 75239b95ed1SBarry Smith v = a->a; 75339b95ed1SBarry Smith ii = a->i; 75439b95ed1SBarry Smith 755119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 756119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 757119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 758119a934aSSatish Balay for ( j=0; j<n; j++ ) { 759119a934aSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 760119a934aSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 761119a934aSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 762119a934aSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 763119a934aSSatish Balay v += 9; 764119a934aSSatish Balay } 7651a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; 766119a934aSSatish Balay z += 3; 767119a934aSSatish Balay } 768c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 769c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 77039b95ed1SBarry Smith PLogFlops(18*a->nz - a->m); 77139b95ed1SBarry Smith return 0; 77239b95ed1SBarry Smith } 77339b95ed1SBarry Smith 77439b95ed1SBarry Smith static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 77539b95ed1SBarry Smith { 77639b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77739b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 77839b95ed1SBarry Smith register Scalar x1,x2,x3,x4; 77939b95ed1SBarry Smith int mbs=a->mbs,i,*idx,*ii; 780c16cb8f2SBarry Smith int j,n; 78139b95ed1SBarry Smith 782c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 783c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 78439b95ed1SBarry Smith 78539b95ed1SBarry Smith idx = a->j; 78639b95ed1SBarry Smith v = a->a; 78739b95ed1SBarry Smith ii = a->i; 78839b95ed1SBarry Smith 789119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 790119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 791119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 792119a934aSSatish Balay for ( j=0; j<n; j++ ) { 793119a934aSSatish Balay xb = x + 4*(*idx++); 794119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 795119a934aSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 796119a934aSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 797119a934aSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 798119a934aSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 799119a934aSSatish Balay v += 16; 800119a934aSSatish Balay } 8011a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 802119a934aSSatish Balay z += 4; 803119a934aSSatish Balay } 804c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 805c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 80639b95ed1SBarry Smith PLogFlops(32*a->nz - a->m); 80739b95ed1SBarry Smith return 0; 80839b95ed1SBarry Smith } 80939b95ed1SBarry Smith 81039b95ed1SBarry Smith static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 81139b95ed1SBarry Smith { 81239b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 81339b95ed1SBarry Smith register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 81439b95ed1SBarry Smith register Scalar x1,x2,x3,x4,x5; 815c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 81639b95ed1SBarry Smith 817c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 818c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 81939b95ed1SBarry Smith 82039b95ed1SBarry Smith idx = a->j; 82139b95ed1SBarry Smith v = a->a; 82239b95ed1SBarry Smith ii = a->i; 82339b95ed1SBarry Smith 824119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 825119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 826119a934aSSatish Balay sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 827119a934aSSatish Balay for ( j=0; j<n; j++ ) { 828119a934aSSatish Balay xb = x + 5*(*idx++); 829119a934aSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 830119a934aSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 831119a934aSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 832119a934aSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 833119a934aSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 834119a934aSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 835119a934aSSatish Balay v += 25; 836119a934aSSatish Balay } 8371a6a6d98SBarry Smith z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 838119a934aSSatish Balay z += 5; 839119a934aSSatish Balay } 840c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 841c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 84239b95ed1SBarry Smith PLogFlops(50*a->nz - a->m); 84339b95ed1SBarry Smith return 0; 84439b95ed1SBarry Smith } 84539b95ed1SBarry Smith 84639b95ed1SBarry Smith static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 84739b95ed1SBarry Smith { 84839b95ed1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 84939b95ed1SBarry Smith register Scalar *x,*z,*v,*xb; 850c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 851c16cb8f2SBarry Smith int _One = 1,ncols,k; 852c16cb8f2SBarry Smith Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 85339b95ed1SBarry Smith 854c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 855c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 85639b95ed1SBarry Smith 85739b95ed1SBarry Smith idx = a->j; 85839b95ed1SBarry Smith v = a->a; 85939b95ed1SBarry Smith ii = a->i; 86039b95ed1SBarry Smith 86139b95ed1SBarry Smith 862119a934aSSatish Balay if (!a->mult_work) { 8633b547af2SSatish Balay k = PetscMax(a->m,a->n); 8642b3076dcSSatish Balay a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 865119a934aSSatish Balay } 866119a934aSSatish Balay work = a->mult_work; 867119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 868119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 869119a934aSSatish Balay ncols = n*bs; 870119a934aSSatish Balay workt = work; 871119a934aSSatish Balay for ( j=0; j<n; j++ ) { 872119a934aSSatish Balay xb = x + bs*(*idx++); 873119a934aSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 874119a934aSSatish Balay workt += bs; 875119a934aSSatish Balay } 8761a6a6d98SBarry Smith LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 877119a934aSSatish Balay v += n*bs2; 878119a934aSSatish Balay z += bs; 879119a934aSSatish Balay } 880c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 881c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 8821a6a6d98SBarry Smith PLogFlops(2*a->nz*bs2 - a->m); 883bb42667fSSatish Balay return 0; 884bb42667fSSatish Balay } 885bb42667fSSatish Balay 886f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 887f44d3a6dSSatish Balay { 888f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 889f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,sum; 890c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,n; 891f44d3a6dSSatish Balay 892c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 893c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 894c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 895f44d3a6dSSatish Balay 896f44d3a6dSSatish Balay idx = a->j; 897f44d3a6dSSatish Balay v = a->a; 898f44d3a6dSSatish Balay ii = a->i; 899f44d3a6dSSatish Balay 900f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 901f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 902f44d3a6dSSatish Balay sum = y[i]; 903f44d3a6dSSatish Balay while (n--) sum += *v++ * x[*idx++]; 904f44d3a6dSSatish Balay z[i] = sum; 905f44d3a6dSSatish Balay } 906c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 907c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 908c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 909f44d3a6dSSatish Balay PLogFlops(2*a->nz); 910f44d3a6dSSatish Balay return 0; 911f44d3a6dSSatish Balay } 912f44d3a6dSSatish Balay 913f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 914f44d3a6dSSatish Balay { 915f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 916f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 917f44d3a6dSSatish Balay register Scalar x1,x2; 918f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 919c16cb8f2SBarry Smith int j,n; 920f44d3a6dSSatish Balay 921c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 922c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 923c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 924f44d3a6dSSatish Balay 925f44d3a6dSSatish Balay idx = a->j; 926f44d3a6dSSatish Balay v = a->a; 927f44d3a6dSSatish Balay ii = a->i; 928f44d3a6dSSatish Balay 929f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 930f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 931f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; 932f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 933f44d3a6dSSatish Balay xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 934f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[2]*x2; 935f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[3]*x2; 936f44d3a6dSSatish Balay v += 4; 937f44d3a6dSSatish Balay } 938f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; 939f44d3a6dSSatish Balay z += 2; y += 2; 940f44d3a6dSSatish Balay } 941c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 942c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 943c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 944f44d3a6dSSatish Balay PLogFlops(4*a->nz); 945f44d3a6dSSatish Balay return 0; 946f44d3a6dSSatish Balay } 947f44d3a6dSSatish Balay 948f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 949f44d3a6dSSatish Balay { 950f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 951f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 952c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 953f44d3a6dSSatish Balay 954c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 955c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 956c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 957f44d3a6dSSatish Balay 958f44d3a6dSSatish Balay idx = a->j; 959f44d3a6dSSatish Balay v = a->a; 960f44d3a6dSSatish Balay ii = a->i; 961f44d3a6dSSatish Balay 962f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 963f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 964f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 965f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 966f44d3a6dSSatish Balay xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 967f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 968f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 969f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 970f44d3a6dSSatish Balay v += 9; 971f44d3a6dSSatish Balay } 972f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; 973f44d3a6dSSatish Balay z += 3; y += 3; 974f44d3a6dSSatish Balay } 975c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 976c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 977c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 978f44d3a6dSSatish Balay PLogFlops(18*a->nz); 979f44d3a6dSSatish Balay return 0; 980f44d3a6dSSatish Balay } 981f44d3a6dSSatish Balay 982f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 983f44d3a6dSSatish Balay { 984f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 985f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 986f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4; 987f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii; 988c16cb8f2SBarry Smith int j,n; 989f44d3a6dSSatish Balay 990c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 991c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 992c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 993f44d3a6dSSatish Balay 994f44d3a6dSSatish Balay idx = a->j; 995f44d3a6dSSatish Balay v = a->a; 996f44d3a6dSSatish Balay ii = a->i; 997f44d3a6dSSatish Balay 998f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 999f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1000f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1001f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1002f44d3a6dSSatish Balay xb = x + 4*(*idx++); 1003f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1004f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1005f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1006f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1007f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1008f44d3a6dSSatish Balay v += 16; 1009f44d3a6dSSatish Balay } 1010f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1011f44d3a6dSSatish Balay z += 4; y += 4; 1012f44d3a6dSSatish Balay } 1013c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1014c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1015c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1016f44d3a6dSSatish Balay PLogFlops(32*a->nz); 1017f44d3a6dSSatish Balay return 0; 1018f44d3a6dSSatish Balay } 1019f44d3a6dSSatish Balay 1020f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1021f44d3a6dSSatish Balay { 1022f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1023f44d3a6dSSatish Balay register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1024f44d3a6dSSatish Balay register Scalar x1,x2,x3,x4,x5; 1025c16cb8f2SBarry Smith int mbs=a->mbs,i,*idx,*ii,j,n; 1026f44d3a6dSSatish Balay 1027c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1028c16cb8f2SBarry Smith VecGetArray_Fast(yy,y); 1029c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1030f44d3a6dSSatish Balay 1031f44d3a6dSSatish Balay idx = a->j; 1032f44d3a6dSSatish Balay v = a->a; 1033f44d3a6dSSatish Balay ii = a->i; 1034f44d3a6dSSatish Balay 1035f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1036f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1037f44d3a6dSSatish Balay sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1038f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1039f44d3a6dSSatish Balay xb = x + 5*(*idx++); 1040f44d3a6dSSatish Balay x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1041f44d3a6dSSatish Balay sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1042f44d3a6dSSatish Balay sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1043f44d3a6dSSatish Balay sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1044f44d3a6dSSatish Balay sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1045f44d3a6dSSatish Balay sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1046f44d3a6dSSatish Balay v += 25; 1047f44d3a6dSSatish Balay } 1048f44d3a6dSSatish Balay z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1049f44d3a6dSSatish Balay z += 5; y += 5; 1050f44d3a6dSSatish Balay } 1051c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1052c16cb8f2SBarry Smith VecRestoreArray_Fast(yy,y); 1053c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1054f44d3a6dSSatish Balay PLogFlops(50*a->nz); 1055f44d3a6dSSatish Balay return 0; 1056f44d3a6dSSatish Balay } 1057f44d3a6dSSatish Balay 1058f44d3a6dSSatish Balay static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1059f44d3a6dSSatish Balay { 1060f44d3a6dSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1061f44d3a6dSSatish Balay register Scalar *x,*z,*v,*xb; 1062f44d3a6dSSatish Balay int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1063f44d3a6dSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1064f44d3a6dSSatish Balay 1065f44d3a6dSSatish Balay if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1066f44d3a6dSSatish Balay 1067c16cb8f2SBarry Smith VecGetArray_Fast(xx,x); 1068c16cb8f2SBarry Smith VecGetArray_Fast(zz,z); 1069f44d3a6dSSatish Balay 1070f44d3a6dSSatish Balay idx = a->j; 1071f44d3a6dSSatish Balay v = a->a; 1072f44d3a6dSSatish Balay ii = a->i; 1073f44d3a6dSSatish Balay 1074f44d3a6dSSatish Balay 1075f44d3a6dSSatish Balay if (!a->mult_work) { 1076f44d3a6dSSatish Balay k = PetscMax(a->m,a->n); 1077f44d3a6dSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1078f44d3a6dSSatish Balay } 1079f44d3a6dSSatish Balay work = a->mult_work; 1080f44d3a6dSSatish Balay for ( i=0; i<mbs; i++ ) { 1081f44d3a6dSSatish Balay n = ii[1] - ii[0]; ii++; 1082f44d3a6dSSatish Balay ncols = n*bs; 1083f44d3a6dSSatish Balay workt = work; 1084f44d3a6dSSatish Balay for ( j=0; j<n; j++ ) { 1085f44d3a6dSSatish Balay xb = x + bs*(*idx++); 1086f44d3a6dSSatish Balay for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1087f44d3a6dSSatish Balay workt += bs; 1088f44d3a6dSSatish Balay } 1089f44d3a6dSSatish Balay LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1090f44d3a6dSSatish Balay v += n*bs2; 1091f44d3a6dSSatish Balay z += bs; 1092f44d3a6dSSatish Balay } 1093c16cb8f2SBarry Smith VecRestoreArray_Fast(xx,x); 1094c16cb8f2SBarry Smith VecRestoreArray_Fast(zz,z); 1095f44d3a6dSSatish Balay PLogFlops(2*a->nz*bs2 ); 1096f44d3a6dSSatish Balay return 0; 1097f44d3a6dSSatish Balay } 1098f44d3a6dSSatish Balay 10991a6a6d98SBarry Smith static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1100bb42667fSSatish Balay { 1101bb42667fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 11021a6a6d98SBarry Smith Scalar *xg,*zg,*zb; 1103bb42667fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1104bb1453f0SSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 11059df24120SSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1106119a934aSSatish Balay 1107119a934aSSatish Balay 110890f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 110990f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1110bb1453f0SSatish Balay PetscMemzero(z,N*sizeof(Scalar)); 1111bb42667fSSatish Balay 1112119a934aSSatish Balay idx = a->j; 1113119a934aSSatish Balay v = a->a; 1114119a934aSSatish Balay ii = a->i; 1115119a934aSSatish Balay 1116119a934aSSatish Balay switch (bs) { 1117119a934aSSatish Balay case 1: 1118119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1119119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1120119a934aSSatish Balay xb = x + i; x1 = xb[0]; 1121119a934aSSatish Balay ib = idx + ai[i]; 1122119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1123bb1453f0SSatish Balay rval = ib[j]; 1124bb1453f0SSatish Balay z[rval] += *v++ * x1; 1125119a934aSSatish Balay } 1126119a934aSSatish Balay } 1127119a934aSSatish Balay break; 1128119a934aSSatish Balay case 2: 1129119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1130119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1131119a934aSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1132119a934aSSatish Balay ib = idx + ai[i]; 1133119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1134119a934aSSatish Balay rval = ib[j]*2; 1135bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1136bb1453f0SSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1137119a934aSSatish Balay v += 4; 1138119a934aSSatish Balay } 1139119a934aSSatish Balay } 1140119a934aSSatish Balay break; 1141119a934aSSatish Balay case 3: 1142119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1143119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1144119a934aSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1145119a934aSSatish Balay ib = idx + ai[i]; 1146119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1147119a934aSSatish Balay rval = ib[j]*3; 1148bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1149bb1453f0SSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1150bb1453f0SSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1151119a934aSSatish Balay v += 9; 1152119a934aSSatish Balay } 1153119a934aSSatish Balay } 1154119a934aSSatish Balay break; 1155119a934aSSatish Balay case 4: 1156119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1157119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1158119a934aSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1159119a934aSSatish Balay ib = idx + ai[i]; 1160119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1161119a934aSSatish Balay rval = ib[j]*4; 1162bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1163bb1453f0SSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1164bb1453f0SSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1165bb1453f0SSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1166119a934aSSatish Balay v += 16; 1167119a934aSSatish Balay } 1168119a934aSSatish Balay } 1169119a934aSSatish Balay break; 1170119a934aSSatish Balay case 5: 1171119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1172119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1173119a934aSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1174119a934aSSatish Balay x4 = xb[3]; x5 = xb[4]; 1175119a934aSSatish Balay ib = idx + ai[i]; 1176119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1177119a934aSSatish Balay rval = ib[j]*5; 1178bb1453f0SSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1179bb1453f0SSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1180bb1453f0SSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1181bb1453f0SSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1182bb1453f0SSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1183119a934aSSatish Balay v += 25; 1184119a934aSSatish Balay } 1185119a934aSSatish Balay } 1186119a934aSSatish Balay break; 1187119a934aSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1188119a934aSSatish Balay default: { 1189119a934aSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1190119a934aSSatish Balay if (!a->mult_work) { 11913b547af2SSatish Balay k = PetscMax(a->m,a->n); 1192bb42667fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1193119a934aSSatish Balay CHKPTRQ(a->mult_work); 1194119a934aSSatish Balay } 1195119a934aSSatish Balay work = a->mult_work; 1196119a934aSSatish Balay for ( i=0; i<mbs; i++ ) { 1197119a934aSSatish Balay n = ii[1] - ii[0]; ii++; 1198119a934aSSatish Balay ncols = n*bs; 1199119a934aSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1200119a934aSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1201119a934aSSatish Balay v += n*bs2; 1202119a934aSSatish Balay x += bs; 1203119a934aSSatish Balay workt = work; 1204119a934aSSatish Balay for ( j=0; j<n; j++ ) { 1205119a934aSSatish Balay zb = z + bs*(*idx++); 1206bb1453f0SSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1207119a934aSSatish Balay workt += bs; 1208119a934aSSatish Balay } 1209119a934aSSatish Balay } 1210119a934aSSatish Balay } 1211119a934aSSatish Balay } 12120419e6cdSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 12130419e6cdSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1214faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2 - a->n); 1215faf6580fSSatish Balay return 0; 1216faf6580fSSatish Balay } 1217faf6580fSSatish Balay static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1218faf6580fSSatish Balay { 1219faf6580fSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1220faf6580fSSatish Balay Scalar *xg,*zg,*zb; 1221faf6580fSSatish Balay register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1222faf6580fSSatish Balay int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1223faf6580fSSatish Balay int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1224faf6580fSSatish Balay 1225faf6580fSSatish Balay 1226faf6580fSSatish Balay 122790f02eecSBarry Smith VecGetArray_Fast(xx,xg); x = xg; 122890f02eecSBarry Smith VecGetArray_Fast(zz,zg); z = zg; 1229faf6580fSSatish Balay 1230648c1d95SSatish Balay if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1231648c1d95SSatish Balay else PetscMemzero(z,N*sizeof(Scalar)); 1232648c1d95SSatish Balay 1233faf6580fSSatish Balay 1234faf6580fSSatish Balay idx = a->j; 1235faf6580fSSatish Balay v = a->a; 1236faf6580fSSatish Balay ii = a->i; 1237faf6580fSSatish Balay 1238faf6580fSSatish Balay switch (bs) { 1239faf6580fSSatish Balay case 1: 1240faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1241faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1242faf6580fSSatish Balay xb = x + i; x1 = xb[0]; 1243faf6580fSSatish Balay ib = idx + ai[i]; 1244faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1245faf6580fSSatish Balay rval = ib[j]; 1246faf6580fSSatish Balay z[rval] += *v++ * x1; 1247faf6580fSSatish Balay } 1248faf6580fSSatish Balay } 1249faf6580fSSatish Balay break; 1250faf6580fSSatish Balay case 2: 1251faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1252faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1253faf6580fSSatish Balay xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1254faf6580fSSatish Balay ib = idx + ai[i]; 1255faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1256faf6580fSSatish Balay rval = ib[j]*2; 1257faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2; 1258faf6580fSSatish Balay z[rval++] += v[2]*x1 + v[3]*x2; 1259faf6580fSSatish Balay v += 4; 1260faf6580fSSatish Balay } 1261faf6580fSSatish Balay } 1262faf6580fSSatish Balay break; 1263faf6580fSSatish Balay case 3: 1264faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1265faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1266faf6580fSSatish Balay xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1267faf6580fSSatish Balay ib = idx + ai[i]; 1268faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1269faf6580fSSatish Balay rval = ib[j]*3; 1270faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1271faf6580fSSatish Balay z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1272faf6580fSSatish Balay z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1273faf6580fSSatish Balay v += 9; 1274faf6580fSSatish Balay } 1275faf6580fSSatish Balay } 1276faf6580fSSatish Balay break; 1277faf6580fSSatish Balay case 4: 1278faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1279faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1280faf6580fSSatish Balay xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1281faf6580fSSatish Balay ib = idx + ai[i]; 1282faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1283faf6580fSSatish Balay rval = ib[j]*4; 1284faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1285faf6580fSSatish Balay z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1286faf6580fSSatish Balay z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1287faf6580fSSatish Balay z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1288faf6580fSSatish Balay v += 16; 1289faf6580fSSatish Balay } 1290faf6580fSSatish Balay } 1291faf6580fSSatish Balay break; 1292faf6580fSSatish Balay case 5: 1293faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1294faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1295faf6580fSSatish Balay xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1296faf6580fSSatish Balay x4 = xb[3]; x5 = xb[4]; 1297faf6580fSSatish Balay ib = idx + ai[i]; 1298faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1299faf6580fSSatish Balay rval = ib[j]*5; 1300faf6580fSSatish Balay z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1301faf6580fSSatish Balay z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1302faf6580fSSatish Balay z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1303faf6580fSSatish Balay z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1304faf6580fSSatish Balay z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1305faf6580fSSatish Balay v += 25; 1306faf6580fSSatish Balay } 1307faf6580fSSatish Balay } 1308faf6580fSSatish Balay break; 1309faf6580fSSatish Balay /* block sizes larger then 5 by 5 are handled by BLAS */ 1310faf6580fSSatish Balay default: { 1311faf6580fSSatish Balay int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1312faf6580fSSatish Balay if (!a->mult_work) { 1313faf6580fSSatish Balay k = PetscMax(a->m,a->n); 1314faf6580fSSatish Balay a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1315faf6580fSSatish Balay CHKPTRQ(a->mult_work); 1316faf6580fSSatish Balay } 1317faf6580fSSatish Balay work = a->mult_work; 1318faf6580fSSatish Balay for ( i=0; i<mbs; i++ ) { 1319faf6580fSSatish Balay n = ii[1] - ii[0]; ii++; 1320faf6580fSSatish Balay ncols = n*bs; 1321faf6580fSSatish Balay PetscMemzero(work,ncols*sizeof(Scalar)); 1322faf6580fSSatish Balay LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1323faf6580fSSatish Balay v += n*bs2; 1324faf6580fSSatish Balay x += bs; 1325faf6580fSSatish Balay workt = work; 1326faf6580fSSatish Balay for ( j=0; j<n; j++ ) { 1327faf6580fSSatish Balay zb = z + bs*(*idx++); 1328faf6580fSSatish Balay for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1329faf6580fSSatish Balay workt += bs; 1330faf6580fSSatish Balay } 1331faf6580fSSatish Balay } 1332faf6580fSSatish Balay } 1333faf6580fSSatish Balay } 1334faf6580fSSatish Balay ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1335faf6580fSSatish Balay ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1336faf6580fSSatish Balay PLogFlops(2*a->nz*a->bs2); 13372593348eSBarry Smith return 0; 13382593348eSBarry Smith } 13392593348eSBarry Smith 13404e220ebcSLois Curfman McInnes static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 13412593348eSBarry Smith { 1342b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 13434e220ebcSLois Curfman McInnes 13444e220ebcSLois Curfman McInnes info->rows_global = (double)a->m; 13454e220ebcSLois Curfman McInnes info->columns_global = (double)a->n; 13464e220ebcSLois Curfman McInnes info->rows_local = (double)a->m; 13474e220ebcSLois Curfman McInnes info->columns_local = (double)a->n; 13484e220ebcSLois Curfman McInnes info->block_size = a->bs2; 13494e220ebcSLois Curfman McInnes info->nz_allocated = a->maxnz; 13504e220ebcSLois Curfman McInnes info->nz_used = a->bs2*a->nz; 13514e220ebcSLois Curfman McInnes info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 13524e220ebcSLois Curfman McInnes /* if (info->nz_unneeded != A->info.nz_unneeded) 13534e220ebcSLois Curfman McInnes printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 13544e220ebcSLois Curfman McInnes info->assemblies = A->num_ass; 13554e220ebcSLois Curfman McInnes info->mallocs = a->reallocs; 13564e220ebcSLois Curfman McInnes info->memory = A->mem; 13574e220ebcSLois Curfman McInnes if (A->factor) { 13584e220ebcSLois Curfman McInnes info->fill_ratio_given = A->info.fill_ratio_given; 13594e220ebcSLois Curfman McInnes info->fill_ratio_needed = A->info.fill_ratio_needed; 13604e220ebcSLois Curfman McInnes info->factor_mallocs = A->info.factor_mallocs; 13614e220ebcSLois Curfman McInnes } else { 13624e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; 13634e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 13644e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 13654e220ebcSLois Curfman McInnes } 13662593348eSBarry Smith return 0; 13672593348eSBarry Smith } 13682593348eSBarry Smith 136991d316f6SSatish Balay static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 137091d316f6SSatish Balay { 137191d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 137291d316f6SSatish Balay 137391d316f6SSatish Balay if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 137491d316f6SSatish Balay 137591d316f6SSatish Balay /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 137691d316f6SSatish Balay if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1377a7c10996SSatish Balay (a->nz != b->nz)) { 137891d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 137991d316f6SSatish Balay } 138091d316f6SSatish Balay 138191d316f6SSatish Balay /* if the a->i are the same */ 138291d316f6SSatish Balay if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 138391d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 138491d316f6SSatish Balay } 138591d316f6SSatish Balay 138691d316f6SSatish Balay /* if a->j are the same */ 138791d316f6SSatish Balay if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 138891d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 138991d316f6SSatish Balay } 139091d316f6SSatish Balay 139191d316f6SSatish Balay /* if a->a are the same */ 139291d316f6SSatish Balay if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 139391d316f6SSatish Balay *flg = PETSC_FALSE; return 0; 139491d316f6SSatish Balay } 139591d316f6SSatish Balay *flg = PETSC_TRUE; 139691d316f6SSatish Balay return 0; 139791d316f6SSatish Balay 139891d316f6SSatish Balay } 139991d316f6SSatish Balay 140091d316f6SSatish Balay static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 140191d316f6SSatish Balay { 140291d316f6SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14037e67e3f9SSatish Balay int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 140417e48fc4SSatish Balay Scalar *x, zero = 0.0,*aa,*aa_j; 140517e48fc4SSatish Balay 140617e48fc4SSatish Balay bs = a->bs; 140717e48fc4SSatish Balay aa = a->a; 140817e48fc4SSatish Balay ai = a->i; 140917e48fc4SSatish Balay aj = a->j; 141017e48fc4SSatish Balay ambs = a->mbs; 14119df24120SSatish Balay bs2 = a->bs2; 141291d316f6SSatish Balay 141391d316f6SSatish Balay VecSet(&zero,v); 141490f02eecSBarry Smith VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 14159867e207SSatish Balay if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 141617e48fc4SSatish Balay for ( i=0; i<ambs; i++ ) { 141717e48fc4SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 141817e48fc4SSatish Balay if (aj[j] == i) { 141917e48fc4SSatish Balay row = i*bs; 14207e67e3f9SSatish Balay aa_j = aa+j*bs2; 14217e67e3f9SSatish Balay for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 142291d316f6SSatish Balay break; 142391d316f6SSatish Balay } 142491d316f6SSatish Balay } 142591d316f6SSatish Balay } 142691d316f6SSatish Balay return 0; 142791d316f6SSatish Balay } 142891d316f6SSatish Balay 14299867e207SSatish Balay static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 14309867e207SSatish Balay { 14319867e207SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 14329867e207SSatish Balay Scalar *l,*r,x,*v,*aa,*li,*ri; 14337e67e3f9SSatish Balay int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 14349867e207SSatish Balay 14359867e207SSatish Balay ai = a->i; 14369867e207SSatish Balay aj = a->j; 14379867e207SSatish Balay aa = a->a; 14389867e207SSatish Balay m = a->m; 14399867e207SSatish Balay n = a->n; 14409867e207SSatish Balay bs = a->bs; 14419867e207SSatish Balay mbs = a->mbs; 14429df24120SSatish Balay bs2 = a->bs2; 14439867e207SSatish Balay if (ll) { 144490f02eecSBarry Smith VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 14459867e207SSatish Balay if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 14469867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 14479867e207SSatish Balay M = ai[i+1] - ai[i]; 14489867e207SSatish Balay li = l + i*bs; 14497e67e3f9SSatish Balay v = aa + bs2*ai[i]; 14509867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 14517e67e3f9SSatish Balay for ( k=0; k<bs2; k++ ) { 14529867e207SSatish Balay (*v++) *= li[k%bs]; 14539867e207SSatish Balay } 14549867e207SSatish Balay } 14559867e207SSatish Balay } 14569867e207SSatish Balay } 14579867e207SSatish Balay 14589867e207SSatish Balay if (rr) { 145990f02eecSBarry Smith VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 14609867e207SSatish Balay if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 14619867e207SSatish Balay for ( i=0; i<mbs; i++ ) { /* for each block row */ 14629867e207SSatish Balay M = ai[i+1] - ai[i]; 14637e67e3f9SSatish Balay v = aa + bs2*ai[i]; 14649867e207SSatish Balay for ( j=0; j<M; j++ ) { /* for each block */ 14659867e207SSatish Balay ri = r + bs*aj[ai[i]+j]; 14669867e207SSatish Balay for ( k=0; k<bs; k++ ) { 14679867e207SSatish Balay x = ri[k]; 14689867e207SSatish Balay for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 14699867e207SSatish Balay } 14709867e207SSatish Balay } 14719867e207SSatish Balay } 14729867e207SSatish Balay } 14739867e207SSatish Balay return 0; 14749867e207SSatish Balay } 14759867e207SSatish Balay 14769867e207SSatish Balay 1477b6490206SBarry Smith extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1478b6490206SBarry Smith extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 14792a38ea04SSatish Balay extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1480736121d4SSatish Balay extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1481736121d4SSatish Balay extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 14821a6a6d98SBarry Smith 14831a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 14841a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 14851a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 14861a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 14871a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 14881a6a6d98SBarry Smith extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 14891a6a6d98SBarry Smith 14907fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 14917fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 14927fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 14937fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 14947fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 14957fc0212eSBarry Smith extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 14962593348eSBarry Smith 1497b6490206SBarry Smith static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 14982593348eSBarry Smith { 1499b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 15002593348eSBarry Smith Scalar *v = a->a; 15012593348eSBarry Smith double sum = 0.0; 15029df24120SSatish Balay int i,nz=a->nz,bs2=a->bs2; 15032593348eSBarry Smith 15042593348eSBarry Smith if (type == NORM_FROBENIUS) { 15050419e6cdSSatish Balay for (i=0; i< bs2*nz; i++ ) { 15062593348eSBarry Smith #if defined(PETSC_COMPLEX) 15072593348eSBarry Smith sum += real(conj(*v)*(*v)); v++; 15082593348eSBarry Smith #else 15092593348eSBarry Smith sum += (*v)*(*v); v++; 15102593348eSBarry Smith #endif 15112593348eSBarry Smith } 15122593348eSBarry Smith *norm = sqrt(sum); 15132593348eSBarry Smith } 15142593348eSBarry Smith else { 1515b0267e0aSLois Curfman McInnes SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet"); 15162593348eSBarry Smith } 15172593348eSBarry Smith return 0; 15182593348eSBarry Smith } 15192593348eSBarry Smith 15202593348eSBarry Smith /* 15212593348eSBarry Smith note: This can only work for identity for row and col. It would 15222593348eSBarry Smith be good to check this and otherwise generate an error. 15232593348eSBarry Smith */ 1524b6490206SBarry Smith static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 15252593348eSBarry Smith { 1526b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 15272593348eSBarry Smith Mat outA; 1528de6a44a3SBarry Smith int ierr; 15292593348eSBarry Smith 1530b6490206SBarry Smith if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 15312593348eSBarry Smith 15322593348eSBarry Smith outA = inA; 15332593348eSBarry Smith inA->factor = FACTOR_LU; 15342593348eSBarry Smith a->row = row; 15352593348eSBarry Smith a->col = col; 15362593348eSBarry Smith 1537de6a44a3SBarry Smith a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 15382593348eSBarry Smith 15392593348eSBarry Smith if (!a->diag) { 1540b6490206SBarry Smith ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 15412593348eSBarry Smith } 15427fc0212eSBarry Smith ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 15432593348eSBarry Smith return 0; 15442593348eSBarry Smith } 15452593348eSBarry Smith 1546b6490206SBarry Smith static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 15472593348eSBarry Smith { 1548b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 15499df24120SSatish Balay int one = 1, totalnz = a->bs2*a->nz; 1550b6490206SBarry Smith BLscal_( &totalnz, alpha, a->a, &one ); 1551b6490206SBarry Smith PLogFlops(totalnz); 15522593348eSBarry Smith return 0; 15532593348eSBarry Smith } 15542593348eSBarry Smith 15557e67e3f9SSatish Balay static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 15567e67e3f9SSatish Balay { 15577e67e3f9SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 15587e67e3f9SSatish Balay int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1559a7c10996SSatish Balay int *ai = a->i, *ailen = a->ilen; 15609df24120SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 15617e67e3f9SSatish Balay Scalar *ap, *aa = a->a, zero = 0.0; 15627e67e3f9SSatish Balay 15637e67e3f9SSatish Balay for ( k=0; k<m; k++ ) { /* loop over rows */ 15647e67e3f9SSatish Balay row = im[k]; brow = row/bs; 15657e67e3f9SSatish Balay if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 15667e67e3f9SSatish Balay if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1567a7c10996SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 15687e67e3f9SSatish Balay nrow = ailen[brow]; 15697e67e3f9SSatish Balay for ( l=0; l<n; l++ ) { /* loop over columns */ 15707e67e3f9SSatish Balay if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 15717e67e3f9SSatish Balay if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1572a7c10996SSatish Balay col = in[l] ; 15737e67e3f9SSatish Balay bcol = col/bs; 15747e67e3f9SSatish Balay cidx = col%bs; 15757e67e3f9SSatish Balay ridx = row%bs; 15767e67e3f9SSatish Balay high = nrow; 15777e67e3f9SSatish Balay low = 0; /* assume unsorted */ 15787e67e3f9SSatish Balay while (high-low > 5) { 15797e67e3f9SSatish Balay t = (low+high)/2; 15807e67e3f9SSatish Balay if (rp[t] > bcol) high = t; 15817e67e3f9SSatish Balay else low = t; 15827e67e3f9SSatish Balay } 15837e67e3f9SSatish Balay for ( i=low; i<high; i++ ) { 15847e67e3f9SSatish Balay if (rp[i] > bcol) break; 15857e67e3f9SSatish Balay if (rp[i] == bcol) { 15867e67e3f9SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 15877e67e3f9SSatish Balay goto finished; 15887e67e3f9SSatish Balay } 15897e67e3f9SSatish Balay } 15907e67e3f9SSatish Balay *v++ = zero; 15917e67e3f9SSatish Balay finished:; 15927e67e3f9SSatish Balay } 15937e67e3f9SSatish Balay } 15947e67e3f9SSatish Balay return 0; 15957e67e3f9SSatish Balay } 15967e67e3f9SSatish Balay 15975a838052SSatish Balay static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 15985a838052SSatish Balay { 15995a838052SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 16005a838052SSatish Balay *bs = baij->bs; 16015a838052SSatish Balay return 0; 16025a838052SSatish Balay } 16035a838052SSatish Balay 1604d9b7c43dSSatish Balay /* idx should be of length atlease bs */ 1605d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1606d9b7c43dSSatish Balay { 1607d9b7c43dSSatish Balay int i,row; 1608d9b7c43dSSatish Balay row = idx[0]; 1609d9b7c43dSSatish Balay if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1610d9b7c43dSSatish Balay 1611d9b7c43dSSatish Balay for ( i=1; i<bs; i++ ) { 1612d9b7c43dSSatish Balay if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1613d9b7c43dSSatish Balay } 1614d9b7c43dSSatish Balay *flg = PETSC_TRUE; 1615d9b7c43dSSatish Balay return 0; 1616d9b7c43dSSatish Balay } 1617d9b7c43dSSatish Balay 1618d9b7c43dSSatish Balay static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1619d9b7c43dSSatish Balay { 1620d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1621d9b7c43dSSatish Balay IS is_local; 1622d9b7c43dSSatish Balay int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1623d9b7c43dSSatish Balay PetscTruth flg; 1624d9b7c43dSSatish Balay Scalar zero = 0.0,*aa; 1625d9b7c43dSSatish Balay 1626d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1627d9b7c43dSSatish Balay ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1628d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1629537820f0SBarry Smith ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1630d9b7c43dSSatish Balay ierr = ISSort(is_local); CHKERRQ(ierr); 1631d9b7c43dSSatish Balay ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1632d9b7c43dSSatish Balay 1633d9b7c43dSSatish Balay i = 0; 1634d9b7c43dSSatish Balay while (i < is_n) { 1635d9b7c43dSSatish Balay if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1636d9b7c43dSSatish Balay flg = PETSC_FALSE; 1637d9b7c43dSSatish Balay if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1638d9b7c43dSSatish Balay if (flg) { /* There exists a block of rows to be Zerowed */ 1639d9b7c43dSSatish Balay baij->ilen[rows[i]/bs] = 0; 1640d9b7c43dSSatish Balay i += bs; 1641d9b7c43dSSatish Balay } else { /* Zero out only the requested row */ 1642d9b7c43dSSatish Balay count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1643d9b7c43dSSatish Balay aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1644d9b7c43dSSatish Balay for ( j=0; j<count; j++ ) { 1645d9b7c43dSSatish Balay aa[0] = zero; 1646d9b7c43dSSatish Balay aa+=bs; 1647d9b7c43dSSatish Balay } 1648d9b7c43dSSatish Balay i++; 1649d9b7c43dSSatish Balay } 1650d9b7c43dSSatish Balay } 1651d9b7c43dSSatish Balay if (diag) { 1652d9b7c43dSSatish Balay for ( j=0; j<is_n; j++ ) { 1653d9b7c43dSSatish Balay ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1654d9b7c43dSSatish Balay } 1655d9b7c43dSSatish Balay } 1656d9b7c43dSSatish Balay ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1657d9b7c43dSSatish Balay ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1658d9b7c43dSSatish Balay ierr = ISDestroy(is_local); CHKERRQ(ierr); 16599a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1660d9b7c43dSSatish Balay 1661d9b7c43dSSatish Balay return 0; 1662d9b7c43dSSatish Balay } 1663ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1664ba4ca20aSSatish Balay { 1665ba4ca20aSSatish Balay static int called = 0; 1666ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1667ba4ca20aSSatish Balay 1668ba4ca20aSSatish Balay if (called) return 0; else called = 1; 1669ba4ca20aSSatish Balay PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1670ba4ca20aSSatish Balay PetscPrintf(comm," -mat_block_size <block_size>\n"); 1671ba4ca20aSSatish Balay return 0; 1672ba4ca20aSSatish Balay } 1673d9b7c43dSSatish Balay 16742593348eSBarry Smith /* -------------------------------------------------------------------*/ 1675cd0e1443SSatish Balay static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 16769867e207SSatish Balay MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1677f44d3a6dSSatish Balay MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1678faf6580fSSatish Balay MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 16791a6a6d98SBarry Smith MatSolve_SeqBAIJ_N,0, 1680b6490206SBarry Smith 0,0, 1681de6a44a3SBarry Smith MatLUFactor_SeqBAIJ,0, 1682b6490206SBarry Smith 0, 1683f2501298SSatish Balay MatTranspose_SeqBAIJ, 168417e48fc4SSatish Balay MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 16859867e207SSatish Balay MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1686584200bdSSatish Balay 0,MatAssemblyEnd_SeqBAIJ, 1687b6490206SBarry Smith 0, 1688d9b7c43dSSatish Balay MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 16897fc0212eSBarry Smith MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1690b6490206SBarry Smith MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1691de6a44a3SBarry Smith MatILUFactorSymbolic_SeqBAIJ,0, 1692b6490206SBarry Smith 0,0,/* MatConvert_SeqBAIJ */ 0, 1693b6490206SBarry Smith MatConvertSameType_SeqBAIJ,0,0, 1694b6490206SBarry Smith MatILUFactor_SeqBAIJ,0,0, 1695af015674SSatish Balay MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 16967e67e3f9SSatish Balay MatGetValues_SeqBAIJ,0, 1697ba4ca20aSSatish Balay MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 16983b2fbd54SBarry Smith 0,0,0,MatGetBlockSize_SeqBAIJ, 16993b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 17003b2fbd54SBarry Smith MatRestoreRowIJ_SeqBAIJ}; 17012593348eSBarry Smith 17022593348eSBarry Smith /*@C 170344cd7ae7SLois Curfman McInnes MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 170444cd7ae7SLois Curfman McInnes compressed row) format. For good matrix assembly performance the 170544cd7ae7SLois Curfman McInnes user should preallocate the matrix storage by setting the parameter nz 17062bd5e0b2SLois Curfman McInnes (or the array nzz). By setting these parameters accurately, performance 17072bd5e0b2SLois Curfman McInnes during matrix assembly can be increased by more than a factor of 50. 17082593348eSBarry Smith 17092593348eSBarry Smith Input Parameters: 17102593348eSBarry Smith . comm - MPI communicator, set to MPI_COMM_SELF 1711b6490206SBarry Smith . bs - size of block 17122593348eSBarry Smith . m - number of rows 17132593348eSBarry Smith . n - number of columns 1714b6490206SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 17152bd5e0b2SLois Curfman McInnes . nzz - array containing the number of block nonzeros in the various block rows 17162bd5e0b2SLois Curfman McInnes (possibly different for each block row) or PETSC_NULL 17172593348eSBarry Smith 17182593348eSBarry Smith Output Parameter: 17192593348eSBarry Smith . A - the matrix 17202593348eSBarry Smith 17212593348eSBarry Smith Notes: 172244cd7ae7SLois Curfman McInnes The block AIJ format is fully compatible with standard Fortran 77 17232593348eSBarry Smith storage. That is, the stored row and column indices can begin at 172444cd7ae7SLois Curfman McInnes either one (as in Fortran) or zero. See the users' manual for details. 17252593348eSBarry Smith 17262593348eSBarry Smith Specify the preallocated storage with either nz or nnz (not both). 17272593348eSBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 17282593348eSBarry Smith allocation. For additional details, see the users manual chapter on 17296da5968aSLois Curfman McInnes matrices. 17302593348eSBarry Smith 173144cd7ae7SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 17322593348eSBarry Smith @*/ 1733b6490206SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 17342593348eSBarry Smith { 17352593348eSBarry Smith Mat B; 1736b6490206SBarry Smith Mat_SeqBAIJ *b; 17373b2fbd54SBarry Smith int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 17383b2fbd54SBarry Smith 17393b2fbd54SBarry Smith MPI_Comm_size(comm,&size); 17403b2fbd54SBarry Smith if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1"); 1741b6490206SBarry Smith 1742f2501298SSatish Balay if (mbs*bs!=m || nbs*bs!=n) 1743f2501298SSatish Balay SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 17442593348eSBarry Smith 17452593348eSBarry Smith *A = 0; 1746b6490206SBarry Smith PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 17472593348eSBarry Smith PLogObjectCreate(B); 1748b6490206SBarry Smith B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 174944cd7ae7SLois Curfman McInnes PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 17502593348eSBarry Smith PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 17511a6a6d98SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 17521a6a6d98SBarry Smith if (!flg) { 17537fc0212eSBarry Smith switch (bs) { 17547fc0212eSBarry Smith case 1: 17557fc0212eSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17561a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_1; 175739b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_1; 1758f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_1; 17597fc0212eSBarry Smith break; 17604eeb42bcSBarry Smith case 2: 17614eeb42bcSBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 17621a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_2; 176339b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_2; 1764f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_2; 17656d84be18SBarry Smith break; 1766f327dd97SBarry Smith case 3: 1767f327dd97SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 17681a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_3; 176939b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_3; 1770f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_3; 17714eeb42bcSBarry Smith break; 17726d84be18SBarry Smith case 4: 17736d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 17741a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_4; 177539b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_4; 1776f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_4; 17776d84be18SBarry Smith break; 17786d84be18SBarry Smith case 5: 17796d84be18SBarry Smith B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 17801a6a6d98SBarry Smith B->ops.solve = MatSolve_SeqBAIJ_5; 178139b95ed1SBarry Smith B->ops.mult = MatMult_SeqBAIJ_5; 1782f44d3a6dSSatish Balay B->ops.multadd = MatMultAdd_SeqBAIJ_5; 17836d84be18SBarry Smith break; 17847fc0212eSBarry Smith } 17851a6a6d98SBarry Smith } 1786b6490206SBarry Smith B->destroy = MatDestroy_SeqBAIJ; 1787b6490206SBarry Smith B->view = MatView_SeqBAIJ; 17882593348eSBarry Smith B->factor = 0; 17892593348eSBarry Smith B->lupivotthreshold = 1.0; 179090f02eecSBarry Smith B->mapping = 0; 17912593348eSBarry Smith b->row = 0; 17922593348eSBarry Smith b->col = 0; 17932593348eSBarry Smith b->reallocs = 0; 17942593348eSBarry Smith 179544cd7ae7SLois Curfman McInnes b->m = m; B->m = m; B->M = m; 179644cd7ae7SLois Curfman McInnes b->n = n; B->n = n; B->N = n; 1797b6490206SBarry Smith b->mbs = mbs; 1798f2501298SSatish Balay b->nbs = nbs; 1799b6490206SBarry Smith b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 18002593348eSBarry Smith if (nnz == PETSC_NULL) { 1801119a934aSSatish Balay if (nz == PETSC_DEFAULT) nz = 5; 18022593348eSBarry Smith else if (nz <= 0) nz = 1; 1803b6490206SBarry Smith for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1804b6490206SBarry Smith nz = nz*mbs; 18052593348eSBarry Smith } 18062593348eSBarry Smith else { 18072593348eSBarry Smith nz = 0; 1808b6490206SBarry Smith for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 18092593348eSBarry Smith } 18102593348eSBarry Smith 18112593348eSBarry Smith /* allocate the matrix space */ 18127e67e3f9SSatish Balay len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 18132593348eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 18147e67e3f9SSatish Balay PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 18157e67e3f9SSatish Balay b->j = (int *) (b->a + nz*bs2); 18162593348eSBarry Smith PetscMemzero(b->j,nz*sizeof(int)); 18172593348eSBarry Smith b->i = b->j + nz; 18182593348eSBarry Smith b->singlemalloc = 1; 18192593348eSBarry Smith 1820de6a44a3SBarry Smith b->i[0] = 0; 1821b6490206SBarry Smith for (i=1; i<mbs+1; i++) { 18222593348eSBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 18232593348eSBarry Smith } 18242593348eSBarry Smith 1825b6490206SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 1826b6490206SBarry Smith b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1827b6490206SBarry Smith PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1828b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 18292593348eSBarry Smith 1830b6490206SBarry Smith b->bs = bs; 18319df24120SSatish Balay b->bs2 = bs2; 1832b6490206SBarry Smith b->mbs = mbs; 18332593348eSBarry Smith b->nz = 0; 183418e7b8e4SLois Curfman McInnes b->maxnz = nz*bs2; 18352593348eSBarry Smith b->sorted = 0; 18362593348eSBarry Smith b->roworiented = 1; 18372593348eSBarry Smith b->nonew = 0; 18382593348eSBarry Smith b->diag = 0; 18392593348eSBarry Smith b->solve_work = 0; 1840de6a44a3SBarry Smith b->mult_work = 0; 18412593348eSBarry Smith b->spptr = 0; 18424e220ebcSLois Curfman McInnes B->info.nz_unneeded = (double)b->maxnz; 18434e220ebcSLois Curfman McInnes 18442593348eSBarry Smith *A = B; 18452593348eSBarry Smith ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 18462593348eSBarry Smith if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 18472593348eSBarry Smith return 0; 18482593348eSBarry Smith } 18492593348eSBarry Smith 1850b6490206SBarry Smith int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 18512593348eSBarry Smith { 18522593348eSBarry Smith Mat C; 1853b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 18549df24120SSatish Balay int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1855de6a44a3SBarry Smith 1856de6a44a3SBarry Smith if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 18572593348eSBarry Smith 18582593348eSBarry Smith *B = 0; 1859b6490206SBarry Smith PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 18602593348eSBarry Smith PLogObjectCreate(C); 1861b6490206SBarry Smith C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 18622593348eSBarry Smith PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1863b6490206SBarry Smith C->destroy = MatDestroy_SeqBAIJ; 1864b6490206SBarry Smith C->view = MatView_SeqBAIJ; 18652593348eSBarry Smith C->factor = A->factor; 18662593348eSBarry Smith c->row = 0; 18672593348eSBarry Smith c->col = 0; 18682593348eSBarry Smith C->assembled = PETSC_TRUE; 18692593348eSBarry Smith 187044cd7ae7SLois Curfman McInnes c->m = C->m = a->m; 187144cd7ae7SLois Curfman McInnes c->n = C->n = a->n; 187244cd7ae7SLois Curfman McInnes C->M = a->m; 187344cd7ae7SLois Curfman McInnes C->N = a->n; 187444cd7ae7SLois Curfman McInnes 1875b6490206SBarry Smith c->bs = a->bs; 18769df24120SSatish Balay c->bs2 = a->bs2; 1877b6490206SBarry Smith c->mbs = a->mbs; 1878b6490206SBarry Smith c->nbs = a->nbs; 18792593348eSBarry Smith 1880b6490206SBarry Smith c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1881b6490206SBarry Smith c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1882b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 18832593348eSBarry Smith c->imax[i] = a->imax[i]; 18842593348eSBarry Smith c->ilen[i] = a->ilen[i]; 18852593348eSBarry Smith } 18862593348eSBarry Smith 18872593348eSBarry Smith /* allocate the matrix space */ 18882593348eSBarry Smith c->singlemalloc = 1; 18897e67e3f9SSatish Balay len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 18902593348eSBarry Smith c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 18917e67e3f9SSatish Balay c->j = (int *) (c->a + nz*bs2); 1892de6a44a3SBarry Smith c->i = c->j + nz; 1893b6490206SBarry Smith PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1894b6490206SBarry Smith if (mbs > 0) { 1895de6a44a3SBarry Smith PetscMemcpy(c->j,a->j,nz*sizeof(int)); 18962593348eSBarry Smith if (cpvalues == COPY_VALUES) { 18977e67e3f9SSatish Balay PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 18982593348eSBarry Smith } 18992593348eSBarry Smith } 19002593348eSBarry Smith 1901b6490206SBarry Smith PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 19022593348eSBarry Smith c->sorted = a->sorted; 19032593348eSBarry Smith c->roworiented = a->roworiented; 19042593348eSBarry Smith c->nonew = a->nonew; 19052593348eSBarry Smith 19062593348eSBarry Smith if (a->diag) { 1907b6490206SBarry Smith c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1908b6490206SBarry Smith PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1909b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 19102593348eSBarry Smith c->diag[i] = a->diag[i]; 19112593348eSBarry Smith } 19122593348eSBarry Smith } 19132593348eSBarry Smith else c->diag = 0; 19142593348eSBarry Smith c->nz = a->nz; 19152593348eSBarry Smith c->maxnz = a->maxnz; 19162593348eSBarry Smith c->solve_work = 0; 19172593348eSBarry Smith c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 19187fc0212eSBarry Smith c->mult_work = 0; 19192593348eSBarry Smith *B = C; 19202593348eSBarry Smith return 0; 19212593348eSBarry Smith } 19222593348eSBarry Smith 192319bcc07fSBarry Smith int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 19242593348eSBarry Smith { 1925b6490206SBarry Smith Mat_SeqBAIJ *a; 19262593348eSBarry Smith Mat B; 1927de6a44a3SBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1928b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 192935aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1930a7c10996SSatish Balay int *masked, nmask,tmp,bs2,ishift; 1931b6490206SBarry Smith Scalar *aa; 193219bcc07fSBarry Smith MPI_Comm comm = ((PetscObject) viewer)->comm; 19332593348eSBarry Smith 19341a6a6d98SBarry Smith ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1935de6a44a3SBarry Smith bs2 = bs*bs; 1936b6490206SBarry Smith 19372593348eSBarry Smith MPI_Comm_size(comm,&size); 1938b6490206SBarry Smith if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 193990ace30eSBarry Smith ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 194077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1941de6a44a3SBarry Smith if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 19422593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 19432593348eSBarry Smith 1944b6490206SBarry Smith if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 194535aab85fSBarry Smith 194635aab85fSBarry Smith /* 194735aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 194835aab85fSBarry Smith divisible by the blocksize 194935aab85fSBarry Smith */ 1950b6490206SBarry Smith mbs = M/bs; 195135aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 195235aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 195335aab85fSBarry Smith else mbs++; 195435aab85fSBarry Smith if (extra_rows) { 1955537820f0SBarry Smith PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 195635aab85fSBarry Smith } 1957b6490206SBarry Smith 19582593348eSBarry Smith /* read in row lengths */ 195935aab85fSBarry Smith rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 196077c4ece6SBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 196135aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 19622593348eSBarry Smith 1963b6490206SBarry Smith /* read in column indices */ 196435aab85fSBarry Smith jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 196577c4ece6SBarry Smith ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 196635aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1967b6490206SBarry Smith 1968b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1969b6490206SBarry Smith browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1970b6490206SBarry Smith PetscMemzero(browlengths,mbs*sizeof(int)); 197135aab85fSBarry Smith mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 197235aab85fSBarry Smith PetscMemzero(mask,mbs*sizeof(int)); 197335aab85fSBarry Smith masked = mask + mbs; 1974b6490206SBarry Smith rowcount = 0; nzcount = 0; 1975b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 197635aab85fSBarry Smith nmask = 0; 1977b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 1978b6490206SBarry Smith kmax = rowlengths[rowcount]; 1979b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 198035aab85fSBarry Smith tmp = jj[nzcount++]/bs; 198135aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1982b6490206SBarry Smith } 1983b6490206SBarry Smith rowcount++; 1984b6490206SBarry Smith } 198535aab85fSBarry Smith browlengths[i] += nmask; 198635aab85fSBarry Smith /* zero out the mask elements we set */ 198735aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1988b6490206SBarry Smith } 1989b6490206SBarry Smith 19902593348eSBarry Smith /* create our matrix */ 199135aab85fSBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 199235aab85fSBarry Smith CHKERRQ(ierr); 19932593348eSBarry Smith B = *A; 1994b6490206SBarry Smith a = (Mat_SeqBAIJ *) B->data; 19952593348eSBarry Smith 19962593348eSBarry Smith /* set matrix "i" values */ 1997de6a44a3SBarry Smith a->i[0] = 0; 1998b6490206SBarry Smith for ( i=1; i<= mbs; i++ ) { 1999b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 2000b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 20012593348eSBarry Smith } 2002b6490206SBarry Smith a->nz = 0; 2003b6490206SBarry Smith for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 20042593348eSBarry Smith 2005b6490206SBarry Smith /* read in nonzero values */ 200635aab85fSBarry Smith aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 200777c4ece6SBarry Smith ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 200835aab85fSBarry Smith for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2009b6490206SBarry Smith 2010b6490206SBarry Smith /* set "a" and "j" values into matrix */ 2011b6490206SBarry Smith nzcount = 0; jcount = 0; 2012b6490206SBarry Smith for ( i=0; i<mbs; i++ ) { 2013b6490206SBarry Smith nzcountb = nzcount; 201435aab85fSBarry Smith nmask = 0; 2015b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2016b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2017b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 201835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 201935aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2020b6490206SBarry Smith } 2021b6490206SBarry Smith rowcount++; 2022b6490206SBarry Smith } 2023de6a44a3SBarry Smith /* sort the masked values */ 202477c4ece6SBarry Smith PetscSortInt(nmask,masked); 2025de6a44a3SBarry Smith 2026b6490206SBarry Smith /* set "j" values into matrix */ 2027b6490206SBarry Smith maskcount = 1; 202835aab85fSBarry Smith for ( j=0; j<nmask; j++ ) { 202935aab85fSBarry Smith a->j[jcount++] = masked[j]; 2030de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 2031b6490206SBarry Smith } 2032b6490206SBarry Smith /* set "a" values into matrix */ 2033de6a44a3SBarry Smith ishift = bs2*a->i[i]; 2034b6490206SBarry Smith for ( j=0; j<bs; j++ ) { 2035b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 2036b6490206SBarry Smith for ( k=0; k<kmax; k++ ) { 2037de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 2038de6a44a3SBarry Smith block = mask[tmp] - 1; 2039de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 2040de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 2041b6490206SBarry Smith a->a[idx] = aa[nzcountb++]; 2042b6490206SBarry Smith } 2043b6490206SBarry Smith } 204435aab85fSBarry Smith /* zero out the mask elements we set */ 204535aab85fSBarry Smith for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2046b6490206SBarry Smith } 204735aab85fSBarry Smith if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2048b6490206SBarry Smith 2049b6490206SBarry Smith PetscFree(rowlengths); 2050b6490206SBarry Smith PetscFree(browlengths); 2051b6490206SBarry Smith PetscFree(aa); 2052b6490206SBarry Smith PetscFree(jj); 2053b6490206SBarry Smith PetscFree(mask); 2054b6490206SBarry Smith 2055b6490206SBarry Smith B->assembled = PETSC_TRUE; 2056de6a44a3SBarry Smith 2057de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2058de6a44a3SBarry Smith if (flg) { 205919bcc07fSBarry Smith Viewer tviewer; 206019bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2061639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 206219bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 206319bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2064de6a44a3SBarry Smith } 2065de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2066de6a44a3SBarry Smith if (flg) { 206719bcc07fSBarry Smith Viewer tviewer; 206819bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2069639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 207019bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 207119bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2072de6a44a3SBarry Smith } 2073de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2074de6a44a3SBarry Smith if (flg) { 207519bcc07fSBarry Smith Viewer tviewer; 207619bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 207719bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 207819bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2079de6a44a3SBarry Smith } 2080de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2081de6a44a3SBarry Smith if (flg) { 208219bcc07fSBarry Smith Viewer tviewer; 208319bcc07fSBarry Smith ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2084639f9d9dSBarry Smith ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 208519bcc07fSBarry Smith ierr = MatView(B,tviewer); CHKERRQ(ierr); 208619bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2087de6a44a3SBarry Smith } 2088de6a44a3SBarry Smith ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2089de6a44a3SBarry Smith if (flg) { 209019bcc07fSBarry Smith Viewer tviewer; 209119bcc07fSBarry Smith ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 209219bcc07fSBarry Smith ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 209319bcc07fSBarry Smith ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 209419bcc07fSBarry Smith ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2095de6a44a3SBarry Smith } 20962593348eSBarry Smith return 0; 20972593348eSBarry Smith } 20982593348eSBarry Smith 20992593348eSBarry Smith 21002593348eSBarry Smith 2101