128e30874SSatish Balay #include <../src/mat/impls/baij/seq/baij.h> 2*af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 328e30874SSatish Balay 428e30874SSatish Balay #undef __FUNCT__ 528e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_N_inplace" 628e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 728e30874SSatish Balay { 828e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 928e30874SSatish Balay IS iscol=a->col,isrow=a->row; 1028e30874SSatish Balay PetscErrorCode ierr; 1128e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 1228e30874SSatish Balay const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*vi; 1328e30874SSatish Balay PetscInt i,nz; 1428e30874SSatish Balay const PetscInt bs =A->rmap->bs,bs2=a->bs2; 1528e30874SSatish Balay const MatScalar *aa=a->a,*v; 1628e30874SSatish Balay PetscScalar *x,*s,*t,*ls; 1728e30874SSatish Balay const PetscScalar *b; 1828e30874SSatish Balay 1928e30874SSatish Balay PetscFunctionBegin; 2028e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2128e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2228e30874SSatish Balay t = a->solve_work; 2328e30874SSatish Balay 2428e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 2528e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 2628e30874SSatish Balay 2728e30874SSatish Balay /* forward solve the lower triangular */ 2828e30874SSatish Balay ierr = PetscMemcpy(t,b+bs*(*r++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 2928e30874SSatish Balay for (i=1; i<n; i++) { 3028e30874SSatish Balay v = aa + bs2*ai[i]; 3128e30874SSatish Balay vi = aj + ai[i]; 3228e30874SSatish Balay nz = a->diag[i] - ai[i]; 3328e30874SSatish Balay s = t + bs*i; 3428e30874SSatish Balay ierr = PetscMemcpy(s,b+bs*(*r++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 3528e30874SSatish Balay while (nz--) { 3628e30874SSatish Balay PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*(*vi++)); 3728e30874SSatish Balay v += bs2; 3828e30874SSatish Balay } 3928e30874SSatish Balay } 4028e30874SSatish Balay /* backward solve the upper triangular */ 4128e30874SSatish Balay ls = a->solve_work + A->cmap->n; 4228e30874SSatish Balay for (i=n-1; i>=0; i--) { 4328e30874SSatish Balay v = aa + bs2*(a->diag[i] + 1); 4428e30874SSatish Balay vi = aj + a->diag[i] + 1; 4528e30874SSatish Balay nz = ai[i+1] - a->diag[i] - 1; 4628e30874SSatish Balay ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 4728e30874SSatish Balay while (nz--) { 4828e30874SSatish Balay PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*(*vi++)); 4928e30874SSatish Balay v += bs2; 5028e30874SSatish Balay } 5128e30874SSatish Balay PetscKernel_w_gets_A_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs); 5228e30874SSatish Balay ierr = PetscMemcpy(x + bs*(*c--),t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 5328e30874SSatish Balay } 5428e30874SSatish Balay 5528e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 5628e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 5728e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 5828e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5928e30874SSatish Balay ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 6028e30874SSatish Balay PetscFunctionReturn(0); 6128e30874SSatish Balay } 6228e30874SSatish Balay 6328e30874SSatish Balay #undef __FUNCT__ 6428e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_7_inplace" 6528e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_7_inplace(Mat A,Vec bb,Vec xx) 6628e30874SSatish Balay { 6728e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 6828e30874SSatish Balay IS iscol=a->col,isrow=a->row; 6928e30874SSatish Balay PetscErrorCode ierr; 7028e30874SSatish Balay const PetscInt *r,*c,*ai=a->i,*aj=a->j; 7128e30874SSatish Balay const PetscInt *rout,*cout,*diag = a->diag,*vi,n=a->mbs; 7228e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 7328e30874SSatish Balay const MatScalar *aa=a->a,*v; 7428e30874SSatish Balay PetscScalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7,*x,*t; 7528e30874SSatish Balay const PetscScalar *b; 7628e30874SSatish Balay 7728e30874SSatish Balay PetscFunctionBegin; 7828e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 7928e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8028e30874SSatish Balay t = a->solve_work; 8128e30874SSatish Balay 8228e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 8328e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 8428e30874SSatish Balay 8528e30874SSatish Balay /* forward solve the lower triangular */ 8628e30874SSatish Balay idx = 7*(*r++); 8728e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 8828e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx]; 8928e30874SSatish Balay t[5] = b[5+idx]; t[6] = b[6+idx]; 9028e30874SSatish Balay 9128e30874SSatish Balay for (i=1; i<n; i++) { 9228e30874SSatish Balay v = aa + 49*ai[i]; 9328e30874SSatish Balay vi = aj + ai[i]; 9428e30874SSatish Balay nz = diag[i] - ai[i]; 9528e30874SSatish Balay idx = 7*(*r++); 9628e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 9728e30874SSatish Balay s5 = b[4+idx];s6 = b[5+idx];s7 = b[6+idx]; 9828e30874SSatish Balay while (nz--) { 9928e30874SSatish Balay idx = 7*(*vi++); 10028e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx]; 10128e30874SSatish Balay x4 = t[3+idx];x5 = t[4+idx]; 10228e30874SSatish Balay x6 = t[5+idx];x7 = t[6+idx]; 10328e30874SSatish Balay s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 10428e30874SSatish Balay s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 10528e30874SSatish Balay s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 10628e30874SSatish Balay s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 10728e30874SSatish Balay s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 10828e30874SSatish Balay s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 10928e30874SSatish Balay s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 11028e30874SSatish Balay v += 49; 11128e30874SSatish Balay } 11228e30874SSatish Balay idx = 7*i; 11328e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 11428e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 11528e30874SSatish Balay t[5+idx] = s6;t[6+idx] = s7; 11628e30874SSatish Balay } 11728e30874SSatish Balay /* backward solve the upper triangular */ 11828e30874SSatish Balay for (i=n-1; i>=0; i--) { 11928e30874SSatish Balay v = aa + 49*diag[i] + 49; 12028e30874SSatish Balay vi = aj + diag[i] + 1; 12128e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 12228e30874SSatish Balay idt = 7*i; 12328e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 12428e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 12528e30874SSatish Balay s6 = t[5+idt];s7 = t[6+idt]; 12628e30874SSatish Balay while (nz--) { 12728e30874SSatish Balay idx = 7*(*vi++); 12828e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 12928e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 13028e30874SSatish Balay x6 = t[5+idx]; x7 = t[6+idx]; 13128e30874SSatish Balay s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 13228e30874SSatish Balay s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 13328e30874SSatish Balay s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 13428e30874SSatish Balay s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 13528e30874SSatish Balay s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 13628e30874SSatish Balay s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 13728e30874SSatish Balay s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 13828e30874SSatish Balay v += 49; 13928e30874SSatish Balay } 14028e30874SSatish Balay idc = 7*(*c--); 14128e30874SSatish Balay v = aa + 49*diag[i]; 14228e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[7]*s2+v[14]*s3+ 14328e30874SSatish Balay v[21]*s4+v[28]*s5+v[35]*s6+v[42]*s7; 14428e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[8]*s2+v[15]*s3+ 14528e30874SSatish Balay v[22]*s4+v[29]*s5+v[36]*s6+v[43]*s7; 14628e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[9]*s2+v[16]*s3+ 14728e30874SSatish Balay v[23]*s4+v[30]*s5+v[37]*s6+v[44]*s7; 14828e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[10]*s2+v[17]*s3+ 14928e30874SSatish Balay v[24]*s4+v[31]*s5+v[38]*s6+v[45]*s7; 15028e30874SSatish Balay x[4+idc] = t[4+idt] = v[4]*s1+v[11]*s2+v[18]*s3+ 15128e30874SSatish Balay v[25]*s4+v[32]*s5+v[39]*s6+v[46]*s7; 15228e30874SSatish Balay x[5+idc] = t[5+idt] = v[5]*s1+v[12]*s2+v[19]*s3+ 15328e30874SSatish Balay v[26]*s4+v[33]*s5+v[40]*s6+v[47]*s7; 15428e30874SSatish Balay x[6+idc] = t[6+idt] = v[6]*s1+v[13]*s2+v[20]*s3+ 15528e30874SSatish Balay v[27]*s4+v[34]*s5+v[41]*s6+v[48]*s7; 15628e30874SSatish Balay } 15728e30874SSatish Balay 15828e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 15928e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 16028e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 16128e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 16228e30874SSatish Balay ierr = PetscLogFlops(2.0*49*(a->nz) - 7.0*A->cmap->n);CHKERRQ(ierr); 16328e30874SSatish Balay PetscFunctionReturn(0); 16428e30874SSatish Balay } 16528e30874SSatish Balay 16628e30874SSatish Balay #undef __FUNCT__ 16728e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_7" 16828e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 16928e30874SSatish Balay { 17028e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 17128e30874SSatish Balay IS iscol=a->col,isrow=a->row; 17228e30874SSatish Balay PetscErrorCode ierr; 17328e30874SSatish Balay const PetscInt *r,*c,*ai=a->i,*aj=a->j,*adiag=a->diag; 17428e30874SSatish Balay const PetscInt n=a->mbs,*rout,*cout,*vi; 17528e30874SSatish Balay PetscInt i,nz,idx,idt,idc,m; 17628e30874SSatish Balay const MatScalar *aa=a->a,*v; 17728e30874SSatish Balay PetscScalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7,*x,*t; 17828e30874SSatish Balay const PetscScalar *b; 17928e30874SSatish Balay 18028e30874SSatish Balay PetscFunctionBegin; 18128e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 18228e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 18328e30874SSatish Balay t = a->solve_work; 18428e30874SSatish Balay 18528e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 18628e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 18728e30874SSatish Balay 18828e30874SSatish Balay /* forward solve the lower triangular */ 18928e30874SSatish Balay idx = 7*r[0]; 19028e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 19128e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx]; 19228e30874SSatish Balay t[5] = b[5+idx]; t[6] = b[6+idx]; 19328e30874SSatish Balay 19428e30874SSatish Balay for (i=1; i<n; i++) { 19528e30874SSatish Balay v = aa + 49*ai[i]; 19628e30874SSatish Balay vi = aj + ai[i]; 19728e30874SSatish Balay nz = ai[i+1] - ai[i]; 19828e30874SSatish Balay idx = 7*r[i]; 19928e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 20028e30874SSatish Balay s5 = b[4+idx];s6 = b[5+idx];s7 = b[6+idx]; 20128e30874SSatish Balay for (m=0; m<nz; m++) { 20228e30874SSatish Balay idx = 7*vi[m]; 20328e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx]; 20428e30874SSatish Balay x4 = t[3+idx];x5 = t[4+idx]; 20528e30874SSatish Balay x6 = t[5+idx];x7 = t[6+idx]; 20628e30874SSatish Balay s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 20728e30874SSatish Balay s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 20828e30874SSatish Balay s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 20928e30874SSatish Balay s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 21028e30874SSatish Balay s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 21128e30874SSatish Balay s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 21228e30874SSatish Balay s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 21328e30874SSatish Balay v += 49; 21428e30874SSatish Balay } 21528e30874SSatish Balay idx = 7*i; 21628e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 21728e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 21828e30874SSatish Balay t[5+idx] = s6;t[6+idx] = s7; 21928e30874SSatish Balay } 22028e30874SSatish Balay /* backward solve the upper triangular */ 22128e30874SSatish Balay for (i=n-1; i>=0; i--) { 22228e30874SSatish Balay v = aa + 49*(adiag[i+1]+1); 22328e30874SSatish Balay vi = aj + adiag[i+1]+1; 22428e30874SSatish Balay nz = adiag[i] - adiag[i+1] - 1; 22528e30874SSatish Balay idt = 7*i; 22628e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 22728e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 22828e30874SSatish Balay s6 = t[5+idt];s7 = t[6+idt]; 22928e30874SSatish Balay for (m=0; m<nz; m++) { 23028e30874SSatish Balay idx = 7*vi[m]; 23128e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 23228e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 23328e30874SSatish Balay x6 = t[5+idx]; x7 = t[6+idx]; 23428e30874SSatish Balay s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 23528e30874SSatish Balay s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 23628e30874SSatish Balay s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 23728e30874SSatish Balay s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 23828e30874SSatish Balay s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 23928e30874SSatish Balay s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 24028e30874SSatish Balay s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 24128e30874SSatish Balay v += 49; 24228e30874SSatish Balay } 24328e30874SSatish Balay idc = 7*c[i]; 24428e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[7]*s2+v[14]*s3+ 24528e30874SSatish Balay v[21]*s4+v[28]*s5+v[35]*s6+v[42]*s7; 24628e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[8]*s2+v[15]*s3+ 24728e30874SSatish Balay v[22]*s4+v[29]*s5+v[36]*s6+v[43]*s7; 24828e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[9]*s2+v[16]*s3+ 24928e30874SSatish Balay v[23]*s4+v[30]*s5+v[37]*s6+v[44]*s7; 25028e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[10]*s2+v[17]*s3+ 25128e30874SSatish Balay v[24]*s4+v[31]*s5+v[38]*s6+v[45]*s7; 25228e30874SSatish Balay x[4+idc] = t[4+idt] = v[4]*s1+v[11]*s2+v[18]*s3+ 25328e30874SSatish Balay v[25]*s4+v[32]*s5+v[39]*s6+v[46]*s7; 25428e30874SSatish Balay x[5+idc] = t[5+idt] = v[5]*s1+v[12]*s2+v[19]*s3+ 25528e30874SSatish Balay v[26]*s4+v[33]*s5+v[40]*s6+v[47]*s7; 25628e30874SSatish Balay x[6+idc] = t[6+idt] = v[6]*s1+v[13]*s2+v[20]*s3+ 25728e30874SSatish Balay v[27]*s4+v[34]*s5+v[41]*s6+v[48]*s7; 25828e30874SSatish Balay } 25928e30874SSatish Balay 26028e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 26128e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 26228e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 26328e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 26428e30874SSatish Balay ierr = PetscLogFlops(2.0*49*(a->nz) - 7.0*A->cmap->n);CHKERRQ(ierr); 26528e30874SSatish Balay PetscFunctionReturn(0); 26628e30874SSatish Balay } 26728e30874SSatish Balay 26828e30874SSatish Balay #undef __FUNCT__ 26928e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_6_inplace" 27028e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_6_inplace(Mat A,Vec bb,Vec xx) 27128e30874SSatish Balay { 27228e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 27328e30874SSatish Balay IS iscol=a->col,isrow=a->row; 27428e30874SSatish Balay PetscErrorCode ierr; 27528e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 27628e30874SSatish Balay const PetscInt *diag = a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 27728e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 27828e30874SSatish Balay const MatScalar *aa=a->a,*v; 27928e30874SSatish Balay PetscScalar *x,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*t; 28028e30874SSatish Balay const PetscScalar *b; 28128e30874SSatish Balay 28228e30874SSatish Balay PetscFunctionBegin; 28328e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 28428e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 28528e30874SSatish Balay t = a->solve_work; 28628e30874SSatish Balay 28728e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 28828e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 28928e30874SSatish Balay 29028e30874SSatish Balay /* forward solve the lower triangular */ 29128e30874SSatish Balay idx = 6*(*r++); 29228e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 29328e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; 29428e30874SSatish Balay t[4] = b[4+idx]; t[5] = b[5+idx]; 29528e30874SSatish Balay for (i=1; i<n; i++) { 29628e30874SSatish Balay v = aa + 36*ai[i]; 29728e30874SSatish Balay vi = aj + ai[i]; 29828e30874SSatish Balay nz = diag[i] - ai[i]; 29928e30874SSatish Balay idx = 6*(*r++); 30028e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 30128e30874SSatish Balay s5 = b[4+idx]; s6 = b[5+idx]; 30228e30874SSatish Balay while (nz--) { 30328e30874SSatish Balay idx = 6*(*vi++); 30428e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 30528e30874SSatish Balay x4 = t[3+idx]; x5 = t[4+idx]; x6 = t[5+idx]; 30628e30874SSatish Balay s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 30728e30874SSatish Balay s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 30828e30874SSatish Balay s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 30928e30874SSatish Balay s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 31028e30874SSatish Balay s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 31128e30874SSatish Balay s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 31228e30874SSatish Balay v += 36; 31328e30874SSatish Balay } 31428e30874SSatish Balay idx = 6*i; 31528e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 31628e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; 31728e30874SSatish Balay t[4+idx] = s5;t[5+idx] = s6; 31828e30874SSatish Balay } 31928e30874SSatish Balay /* backward solve the upper triangular */ 32028e30874SSatish Balay for (i=n-1; i>=0; i--) { 32128e30874SSatish Balay v = aa + 36*diag[i] + 36; 32228e30874SSatish Balay vi = aj + diag[i] + 1; 32328e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 32428e30874SSatish Balay idt = 6*i; 32528e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 32628e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; 32728e30874SSatish Balay s5 = t[4+idt];s6 = t[5+idt]; 32828e30874SSatish Balay while (nz--) { 32928e30874SSatish Balay idx = 6*(*vi++); 33028e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 33128e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; 33228e30874SSatish Balay x5 = t[4+idx]; x6 = t[5+idx]; 33328e30874SSatish Balay s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 33428e30874SSatish Balay s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 33528e30874SSatish Balay s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 33628e30874SSatish Balay s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 33728e30874SSatish Balay s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 33828e30874SSatish Balay s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 33928e30874SSatish Balay v += 36; 34028e30874SSatish Balay } 34128e30874SSatish Balay idc = 6*(*c--); 34228e30874SSatish Balay v = aa + 36*diag[i]; 34328e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[6]*s2+v[12]*s3+ 34428e30874SSatish Balay v[18]*s4+v[24]*s5+v[30]*s6; 34528e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[7]*s2+v[13]*s3+ 34628e30874SSatish Balay v[19]*s4+v[25]*s5+v[31]*s6; 34728e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[8]*s2+v[14]*s3+ 34828e30874SSatish Balay v[20]*s4+v[26]*s5+v[32]*s6; 34928e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[9]*s2+v[15]*s3+ 35028e30874SSatish Balay v[21]*s4+v[27]*s5+v[33]*s6; 35128e30874SSatish Balay x[4+idc] = t[4+idt] = v[4]*s1+v[10]*s2+v[16]*s3+ 35228e30874SSatish Balay v[22]*s4+v[28]*s5+v[34]*s6; 35328e30874SSatish Balay x[5+idc] = t[5+idt] = v[5]*s1+v[11]*s2+v[17]*s3+ 35428e30874SSatish Balay v[23]*s4+v[29]*s5+v[35]*s6; 35528e30874SSatish Balay } 35628e30874SSatish Balay 35728e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 35828e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 35928e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 36028e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 36128e30874SSatish Balay ierr = PetscLogFlops(2.0*36*(a->nz) - 6.0*A->cmap->n);CHKERRQ(ierr); 36228e30874SSatish Balay PetscFunctionReturn(0); 36328e30874SSatish Balay } 36428e30874SSatish Balay 36528e30874SSatish Balay #undef __FUNCT__ 36628e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_6" 36728e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 36828e30874SSatish Balay { 36928e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 37028e30874SSatish Balay IS iscol=a->col,isrow=a->row; 37128e30874SSatish Balay PetscErrorCode ierr; 37228e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 37328e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag; 37428e30874SSatish Balay PetscInt i,nz,idx,idt,idc,m; 37528e30874SSatish Balay const MatScalar *aa=a->a,*v; 37628e30874SSatish Balay PetscScalar *x,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*t; 37728e30874SSatish Balay const PetscScalar *b; 37828e30874SSatish Balay 37928e30874SSatish Balay PetscFunctionBegin; 38028e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 38128e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 38228e30874SSatish Balay t = a->solve_work; 38328e30874SSatish Balay 38428e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 38528e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 38628e30874SSatish Balay 38728e30874SSatish Balay /* forward solve the lower triangular */ 38828e30874SSatish Balay idx = 6*r[0]; 38928e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 39028e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; 39128e30874SSatish Balay t[4] = b[4+idx]; t[5] = b[5+idx]; 39228e30874SSatish Balay for (i=1; i<n; i++) { 39328e30874SSatish Balay v = aa + 36*ai[i]; 39428e30874SSatish Balay vi = aj + ai[i]; 39528e30874SSatish Balay nz = ai[i+1] - ai[i]; 39628e30874SSatish Balay idx = 6*r[i]; 39728e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 39828e30874SSatish Balay s5 = b[4+idx]; s6 = b[5+idx]; 39928e30874SSatish Balay for (m=0; m<nz; m++) { 40028e30874SSatish Balay idx = 6*vi[m]; 40128e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 40228e30874SSatish Balay x4 = t[3+idx]; x5 = t[4+idx]; x6 = t[5+idx]; 40328e30874SSatish Balay s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 40428e30874SSatish Balay s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 40528e30874SSatish Balay s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 40628e30874SSatish Balay s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 40728e30874SSatish Balay s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 40828e30874SSatish Balay s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 40928e30874SSatish Balay v += 36; 41028e30874SSatish Balay } 41128e30874SSatish Balay idx = 6*i; 41228e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 41328e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; 41428e30874SSatish Balay t[4+idx] = s5;t[5+idx] = s6; 41528e30874SSatish Balay } 41628e30874SSatish Balay /* backward solve the upper triangular */ 41728e30874SSatish Balay for (i=n-1; i>=0; i--) { 41828e30874SSatish Balay v = aa + 36*(adiag[i+1]+1); 41928e30874SSatish Balay vi = aj + adiag[i+1]+1; 42028e30874SSatish Balay nz = adiag[i] - adiag[i+1] - 1; 42128e30874SSatish Balay idt = 6*i; 42228e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 42328e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; 42428e30874SSatish Balay s5 = t[4+idt];s6 = t[5+idt]; 42528e30874SSatish Balay for (m=0; m<nz; m++) { 42628e30874SSatish Balay idx = 6*vi[m]; 42728e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 42828e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; 42928e30874SSatish Balay x5 = t[4+idx]; x6 = t[5+idx]; 43028e30874SSatish Balay s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 43128e30874SSatish Balay s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 43228e30874SSatish Balay s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 43328e30874SSatish Balay s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 43428e30874SSatish Balay s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 43528e30874SSatish Balay s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 43628e30874SSatish Balay v += 36; 43728e30874SSatish Balay } 43828e30874SSatish Balay idc = 6*c[i]; 43928e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[6]*s2+v[12]*s3+ 44028e30874SSatish Balay v[18]*s4+v[24]*s5+v[30]*s6; 44128e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[7]*s2+v[13]*s3+ 44228e30874SSatish Balay v[19]*s4+v[25]*s5+v[31]*s6; 44328e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[8]*s2+v[14]*s3+ 44428e30874SSatish Balay v[20]*s4+v[26]*s5+v[32]*s6; 44528e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[9]*s2+v[15]*s3+ 44628e30874SSatish Balay v[21]*s4+v[27]*s5+v[33]*s6; 44728e30874SSatish Balay x[4+idc] = t[4+idt] = v[4]*s1+v[10]*s2+v[16]*s3+ 44828e30874SSatish Balay v[22]*s4+v[28]*s5+v[34]*s6; 44928e30874SSatish Balay x[5+idc] = t[5+idt] = v[5]*s1+v[11]*s2+v[17]*s3+ 45028e30874SSatish Balay v[23]*s4+v[29]*s5+v[35]*s6; 45128e30874SSatish Balay } 45228e30874SSatish Balay 45328e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 45428e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 45528e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 45628e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 45728e30874SSatish Balay ierr = PetscLogFlops(2.0*36*(a->nz) - 6.0*A->cmap->n);CHKERRQ(ierr); 45828e30874SSatish Balay PetscFunctionReturn(0); 45928e30874SSatish Balay } 46028e30874SSatish Balay 46128e30874SSatish Balay #undef __FUNCT__ 46228e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_5_inplace" 46328e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_5_inplace(Mat A,Vec bb,Vec xx) 46428e30874SSatish Balay { 46528e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 46628e30874SSatish Balay IS iscol=a->col,isrow=a->row; 46728e30874SSatish Balay PetscErrorCode ierr; 46828e30874SSatish Balay const PetscInt *r,*c,*rout,*cout,*diag = a->diag; 46928e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j; 47028e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 47128e30874SSatish Balay const MatScalar *aa=a->a,*v; 47228e30874SSatish Balay PetscScalar *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*t; 47328e30874SSatish Balay const PetscScalar *b; 47428e30874SSatish Balay 47528e30874SSatish Balay PetscFunctionBegin; 47628e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 47728e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 47828e30874SSatish Balay t = a->solve_work; 47928e30874SSatish Balay 48028e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 48128e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 48228e30874SSatish Balay 48328e30874SSatish Balay /* forward solve the lower triangular */ 48428e30874SSatish Balay idx = 5*(*r++); 48528e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 48628e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx]; 48728e30874SSatish Balay for (i=1; i<n; i++) { 48828e30874SSatish Balay v = aa + 25*ai[i]; 48928e30874SSatish Balay vi = aj + ai[i]; 49028e30874SSatish Balay nz = diag[i] - ai[i]; 49128e30874SSatish Balay idx = 5*(*r++); 49228e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 49328e30874SSatish Balay s5 = b[4+idx]; 49428e30874SSatish Balay while (nz--) { 49528e30874SSatish Balay idx = 5*(*vi++); 49628e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx]; 49728e30874SSatish Balay x4 = t[3+idx];x5 = t[4+idx]; 49828e30874SSatish Balay s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 49928e30874SSatish Balay s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 50028e30874SSatish Balay s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 50128e30874SSatish Balay s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 50228e30874SSatish Balay s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 50328e30874SSatish Balay v += 25; 50428e30874SSatish Balay } 50528e30874SSatish Balay idx = 5*i; 50628e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 50728e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 50828e30874SSatish Balay } 50928e30874SSatish Balay /* backward solve the upper triangular */ 51028e30874SSatish Balay for (i=n-1; i>=0; i--) { 51128e30874SSatish Balay v = aa + 25*diag[i] + 25; 51228e30874SSatish Balay vi = aj + diag[i] + 1; 51328e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 51428e30874SSatish Balay idt = 5*i; 51528e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 51628e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 51728e30874SSatish Balay while (nz--) { 51828e30874SSatish Balay idx = 5*(*vi++); 51928e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 52028e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 52128e30874SSatish Balay s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 52228e30874SSatish Balay s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 52328e30874SSatish Balay s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 52428e30874SSatish Balay s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 52528e30874SSatish Balay s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 52628e30874SSatish Balay v += 25; 52728e30874SSatish Balay } 52828e30874SSatish Balay idc = 5*(*c--); 52928e30874SSatish Balay v = aa + 25*diag[i]; 53028e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[5]*s2+v[10]*s3+ 53128e30874SSatish Balay v[15]*s4+v[20]*s5; 53228e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[6]*s2+v[11]*s3+ 53328e30874SSatish Balay v[16]*s4+v[21]*s5; 53428e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[7]*s2+v[12]*s3+ 53528e30874SSatish Balay v[17]*s4+v[22]*s5; 53628e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[8]*s2+v[13]*s3+ 53728e30874SSatish Balay v[18]*s4+v[23]*s5; 53828e30874SSatish Balay x[4+idc] = t[4+idt] = v[4]*s1+v[9]*s2+v[14]*s3+ 53928e30874SSatish Balay v[19]*s4+v[24]*s5; 54028e30874SSatish Balay } 54128e30874SSatish Balay 54228e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 54328e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 54428e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 54528e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 54628e30874SSatish Balay ierr = PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);CHKERRQ(ierr); 54728e30874SSatish Balay PetscFunctionReturn(0); 54828e30874SSatish Balay } 54928e30874SSatish Balay 55028e30874SSatish Balay #undef __FUNCT__ 55128e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_5" 55228e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 55328e30874SSatish Balay { 55428e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 55528e30874SSatish Balay IS iscol=a->col,isrow=a->row; 55628e30874SSatish Balay PetscErrorCode ierr; 55728e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 55828e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag; 55928e30874SSatish Balay PetscInt i,nz,idx,idt,idc,m; 56028e30874SSatish Balay const MatScalar *aa=a->a,*v; 56128e30874SSatish Balay PetscScalar *x,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*t; 56228e30874SSatish Balay const PetscScalar *b; 56328e30874SSatish Balay 56428e30874SSatish Balay PetscFunctionBegin; 56528e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 56628e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 56728e30874SSatish Balay t = a->solve_work; 56828e30874SSatish Balay 56928e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 57028e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 57128e30874SSatish Balay 57228e30874SSatish Balay /* forward solve the lower triangular */ 57328e30874SSatish Balay idx = 5*r[0]; 57428e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 57528e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx]; 57628e30874SSatish Balay for (i=1; i<n; i++) { 57728e30874SSatish Balay v = aa + 25*ai[i]; 57828e30874SSatish Balay vi = aj + ai[i]; 57928e30874SSatish Balay nz = ai[i+1] - ai[i]; 58028e30874SSatish Balay idx = 5*r[i]; 58128e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 58228e30874SSatish Balay s5 = b[4+idx]; 58328e30874SSatish Balay for (m=0; m<nz; m++) { 58428e30874SSatish Balay idx = 5*vi[m]; 58528e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx]; 58628e30874SSatish Balay x4 = t[3+idx];x5 = t[4+idx]; 58728e30874SSatish Balay s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 58828e30874SSatish Balay s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 58928e30874SSatish Balay s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 59028e30874SSatish Balay s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 59128e30874SSatish Balay s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 59228e30874SSatish Balay v += 25; 59328e30874SSatish Balay } 59428e30874SSatish Balay idx = 5*i; 59528e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 59628e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 59728e30874SSatish Balay } 59828e30874SSatish Balay /* backward solve the upper triangular */ 59928e30874SSatish Balay for (i=n-1; i>=0; i--) { 60028e30874SSatish Balay v = aa + 25*(adiag[i+1]+1); 60128e30874SSatish Balay vi = aj + adiag[i+1]+1; 60228e30874SSatish Balay nz = adiag[i] - adiag[i+1] - 1; 60328e30874SSatish Balay idt = 5*i; 60428e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 60528e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 60628e30874SSatish Balay for (m=0; m<nz; m++) { 60728e30874SSatish Balay idx = 5*vi[m]; 60828e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 60928e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 61028e30874SSatish Balay s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 61128e30874SSatish Balay s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 61228e30874SSatish Balay s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 61328e30874SSatish Balay s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 61428e30874SSatish Balay s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 61528e30874SSatish Balay v += 25; 61628e30874SSatish Balay } 61728e30874SSatish Balay idc = 5*c[i]; 61828e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[5]*s2+v[10]*s3+ 61928e30874SSatish Balay v[15]*s4+v[20]*s5; 62028e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[6]*s2+v[11]*s3+ 62128e30874SSatish Balay v[16]*s4+v[21]*s5; 62228e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[7]*s2+v[12]*s3+ 62328e30874SSatish Balay v[17]*s4+v[22]*s5; 62428e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[8]*s2+v[13]*s3+ 62528e30874SSatish Balay v[18]*s4+v[23]*s5; 62628e30874SSatish Balay x[4+idc] = t[4+idt] = v[4]*s1+v[9]*s2+v[14]*s3+ 62728e30874SSatish Balay v[19]*s4+v[24]*s5; 62828e30874SSatish Balay } 62928e30874SSatish Balay 63028e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 63128e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 63228e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 63328e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 63428e30874SSatish Balay ierr = PetscLogFlops(2.0*25*(a->nz) - 5.0*A->cmap->n);CHKERRQ(ierr); 63528e30874SSatish Balay PetscFunctionReturn(0); 63628e30874SSatish Balay } 63728e30874SSatish Balay 63828e30874SSatish Balay #undef __FUNCT__ 63928e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_4_inplace" 64028e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_4_inplace(Mat A,Vec bb,Vec xx) 64128e30874SSatish Balay { 64228e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 64328e30874SSatish Balay IS iscol=a->col,isrow=a->row; 64428e30874SSatish Balay PetscErrorCode ierr; 64528e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j; 64628e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 64728e30874SSatish Balay const PetscInt *r,*c,*diag = a->diag,*rout,*cout; 64828e30874SSatish Balay const MatScalar *aa=a->a,*v; 64928e30874SSatish Balay PetscScalar *x,s1,s2,s3,s4,x1,x2,x3,x4,*t; 65028e30874SSatish Balay const PetscScalar *b; 65128e30874SSatish Balay 65228e30874SSatish Balay PetscFunctionBegin; 65328e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 65428e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 65528e30874SSatish Balay t = a->solve_work; 65628e30874SSatish Balay 65728e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 65828e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 65928e30874SSatish Balay 66028e30874SSatish Balay /* forward solve the lower triangular */ 66128e30874SSatish Balay idx = 4*(*r++); 66228e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 66328e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; 66428e30874SSatish Balay for (i=1; i<n; i++) { 66528e30874SSatish Balay v = aa + 16*ai[i]; 66628e30874SSatish Balay vi = aj + ai[i]; 66728e30874SSatish Balay nz = diag[i] - ai[i]; 66828e30874SSatish Balay idx = 4*(*r++); 66928e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 67028e30874SSatish Balay while (nz--) { 67128e30874SSatish Balay idx = 4*(*vi++); 67228e30874SSatish Balay x1 = t[idx];x2 = t[1+idx];x3 = t[2+idx];x4 = t[3+idx]; 67328e30874SSatish Balay s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 67428e30874SSatish Balay s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 67528e30874SSatish Balay s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 67628e30874SSatish Balay s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 67728e30874SSatish Balay v += 16; 67828e30874SSatish Balay } 67928e30874SSatish Balay idx = 4*i; 68028e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 68128e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; 68228e30874SSatish Balay } 68328e30874SSatish Balay /* backward solve the upper triangular */ 68428e30874SSatish Balay for (i=n-1; i>=0; i--) { 68528e30874SSatish Balay v = aa + 16*diag[i] + 16; 68628e30874SSatish Balay vi = aj + diag[i] + 1; 68728e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 68828e30874SSatish Balay idt = 4*i; 68928e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 69028e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; 69128e30874SSatish Balay while (nz--) { 69228e30874SSatish Balay idx = 4*(*vi++); 69328e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 69428e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; 69528e30874SSatish Balay s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 69628e30874SSatish Balay s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 69728e30874SSatish Balay s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 69828e30874SSatish Balay s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 69928e30874SSatish Balay v += 16; 70028e30874SSatish Balay } 70128e30874SSatish Balay idc = 4*(*c--); 70228e30874SSatish Balay v = aa + 16*diag[i]; 70328e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4; 70428e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4; 70528e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4; 70628e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4; 70728e30874SSatish Balay } 70828e30874SSatish Balay 70928e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 71028e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 71128e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 71228e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 71328e30874SSatish Balay ierr = PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);CHKERRQ(ierr); 71428e30874SSatish Balay PetscFunctionReturn(0); 71528e30874SSatish Balay } 71628e30874SSatish Balay 71728e30874SSatish Balay #undef __FUNCT__ 71828e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_4" 71928e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 72028e30874SSatish Balay { 72128e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 72228e30874SSatish Balay IS iscol=a->col,isrow=a->row; 72328e30874SSatish Balay PetscErrorCode ierr; 72428e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag; 72528e30874SSatish Balay PetscInt i,nz,idx,idt,idc,m; 72628e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 72728e30874SSatish Balay const MatScalar *aa=a->a,*v; 72828e30874SSatish Balay PetscScalar *x,s1,s2,s3,s4,x1,x2,x3,x4,*t; 72928e30874SSatish Balay const PetscScalar *b; 73028e30874SSatish Balay 73128e30874SSatish Balay PetscFunctionBegin; 73228e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 73328e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 73428e30874SSatish Balay t = a->solve_work; 73528e30874SSatish Balay 73628e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 73728e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 73828e30874SSatish Balay 73928e30874SSatish Balay /* forward solve the lower triangular */ 74028e30874SSatish Balay idx = 4*r[0]; 74128e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 74228e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; 74328e30874SSatish Balay for (i=1; i<n; i++) { 74428e30874SSatish Balay v = aa + 16*ai[i]; 74528e30874SSatish Balay vi = aj + ai[i]; 74628e30874SSatish Balay nz = ai[i+1] - ai[i]; 74728e30874SSatish Balay idx = 4*r[i]; 74828e30874SSatish Balay s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 74928e30874SSatish Balay for (m=0; m<nz; m++) { 75028e30874SSatish Balay idx = 4*vi[m]; 75128e30874SSatish Balay x1 = t[idx];x2 = t[1+idx];x3 = t[2+idx];x4 = t[3+idx]; 75228e30874SSatish Balay s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 75328e30874SSatish Balay s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 75428e30874SSatish Balay s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 75528e30874SSatish Balay s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 75628e30874SSatish Balay v += 16; 75728e30874SSatish Balay } 75828e30874SSatish Balay idx = 4*i; 75928e30874SSatish Balay t[idx] = s1;t[1+idx] = s2; 76028e30874SSatish Balay t[2+idx] = s3;t[3+idx] = s4; 76128e30874SSatish Balay } 76228e30874SSatish Balay /* backward solve the upper triangular */ 76328e30874SSatish Balay for (i=n-1; i>=0; i--) { 76428e30874SSatish Balay v = aa + 16*(adiag[i+1]+1); 76528e30874SSatish Balay vi = aj + adiag[i+1]+1; 76628e30874SSatish Balay nz = adiag[i] - adiag[i+1] - 1; 76728e30874SSatish Balay idt = 4*i; 76828e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 76928e30874SSatish Balay s3 = t[2+idt];s4 = t[3+idt]; 77028e30874SSatish Balay for (m=0; m<nz; m++) { 77128e30874SSatish Balay idx = 4*vi[m]; 77228e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 77328e30874SSatish Balay x3 = t[2+idx]; x4 = t[3+idx]; 77428e30874SSatish Balay s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 77528e30874SSatish Balay s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 77628e30874SSatish Balay s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 77728e30874SSatish Balay s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 77828e30874SSatish Balay v += 16; 77928e30874SSatish Balay } 78028e30874SSatish Balay idc = 4*c[i]; 78128e30874SSatish Balay x[idc] = t[idt] = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4; 78228e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4; 78328e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4; 78428e30874SSatish Balay x[3+idc] = t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4; 78528e30874SSatish Balay } 78628e30874SSatish Balay 78728e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 78828e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 78928e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 79028e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 79128e30874SSatish Balay ierr = PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);CHKERRQ(ierr); 79228e30874SSatish Balay PetscFunctionReturn(0); 79328e30874SSatish Balay } 79428e30874SSatish Balay 79528e30874SSatish Balay #undef __FUNCT__ 79628e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_4_Demotion" 79728e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_4_Demotion(Mat A,Vec bb,Vec xx) 79828e30874SSatish Balay { 79928e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 80028e30874SSatish Balay IS iscol=a->col,isrow=a->row; 80128e30874SSatish Balay PetscErrorCode ierr; 80228e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j; 80328e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 80428e30874SSatish Balay const PetscInt *r,*c,*diag = a->diag,*rout,*cout; 80528e30874SSatish Balay const MatScalar *aa=a->a,*v; 80628e30874SSatish Balay MatScalar s1,s2,s3,s4,x1,x2,x3,x4,*t; 80728e30874SSatish Balay PetscScalar *x; 80828e30874SSatish Balay const PetscScalar *b; 80928e30874SSatish Balay 81028e30874SSatish Balay PetscFunctionBegin; 81128e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 81228e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 81328e30874SSatish Balay t = (MatScalar*)a->solve_work; 81428e30874SSatish Balay 81528e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 81628e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 81728e30874SSatish Balay 81828e30874SSatish Balay /* forward solve the lower triangular */ 81928e30874SSatish Balay idx = 4*(*r++); 82028e30874SSatish Balay t[0] = (MatScalar)b[idx]; 82128e30874SSatish Balay t[1] = (MatScalar)b[1+idx]; 82228e30874SSatish Balay t[2] = (MatScalar)b[2+idx]; 82328e30874SSatish Balay t[3] = (MatScalar)b[3+idx]; 82428e30874SSatish Balay for (i=1; i<n; i++) { 82528e30874SSatish Balay v = aa + 16*ai[i]; 82628e30874SSatish Balay vi = aj + ai[i]; 82728e30874SSatish Balay nz = diag[i] - ai[i]; 82828e30874SSatish Balay idx = 4*(*r++); 82928e30874SSatish Balay s1 = (MatScalar)b[idx]; 83028e30874SSatish Balay s2 = (MatScalar)b[1+idx]; 83128e30874SSatish Balay s3 = (MatScalar)b[2+idx]; 83228e30874SSatish Balay s4 = (MatScalar)b[3+idx]; 83328e30874SSatish Balay while (nz--) { 83428e30874SSatish Balay idx = 4*(*vi++); 83528e30874SSatish Balay x1 = t[idx]; 83628e30874SSatish Balay x2 = t[1+idx]; 83728e30874SSatish Balay x3 = t[2+idx]; 83828e30874SSatish Balay x4 = t[3+idx]; 83928e30874SSatish Balay s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 84028e30874SSatish Balay s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 84128e30874SSatish Balay s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 84228e30874SSatish Balay s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 84328e30874SSatish Balay v += 16; 84428e30874SSatish Balay } 84528e30874SSatish Balay idx = 4*i; 84628e30874SSatish Balay t[idx] = s1; 84728e30874SSatish Balay t[1+idx] = s2; 84828e30874SSatish Balay t[2+idx] = s3; 84928e30874SSatish Balay t[3+idx] = s4; 85028e30874SSatish Balay } 85128e30874SSatish Balay /* backward solve the upper triangular */ 85228e30874SSatish Balay for (i=n-1; i>=0; i--) { 85328e30874SSatish Balay v = aa + 16*diag[i] + 16; 85428e30874SSatish Balay vi = aj + diag[i] + 1; 85528e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 85628e30874SSatish Balay idt = 4*i; 85728e30874SSatish Balay s1 = t[idt]; 85828e30874SSatish Balay s2 = t[1+idt]; 85928e30874SSatish Balay s3 = t[2+idt]; 86028e30874SSatish Balay s4 = t[3+idt]; 86128e30874SSatish Balay while (nz--) { 86228e30874SSatish Balay idx = 4*(*vi++); 86328e30874SSatish Balay x1 = t[idx]; 86428e30874SSatish Balay x2 = t[1+idx]; 86528e30874SSatish Balay x3 = t[2+idx]; 86628e30874SSatish Balay x4 = t[3+idx]; 86728e30874SSatish Balay s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 86828e30874SSatish Balay s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 86928e30874SSatish Balay s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 87028e30874SSatish Balay s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 87128e30874SSatish Balay v += 16; 87228e30874SSatish Balay } 87328e30874SSatish Balay idc = 4*(*c--); 87428e30874SSatish Balay v = aa + 16*diag[i]; 87528e30874SSatish Balay t[idt] = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4; 87628e30874SSatish Balay t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4; 87728e30874SSatish Balay t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4; 87828e30874SSatish Balay t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4; 87928e30874SSatish Balay x[idc] = (PetscScalar)t[idt]; 88028e30874SSatish Balay x[1+idc] = (PetscScalar)t[1+idt]; 88128e30874SSatish Balay x[2+idc] = (PetscScalar)t[2+idt]; 88228e30874SSatish Balay x[3+idc] = (PetscScalar)t[3+idt]; 88328e30874SSatish Balay } 88428e30874SSatish Balay 88528e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 88628e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 88728e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 88828e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 88928e30874SSatish Balay ierr = PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);CHKERRQ(ierr); 89028e30874SSatish Balay PetscFunctionReturn(0); 89128e30874SSatish Balay } 89228e30874SSatish Balay 89328e30874SSatish Balay #if defined(PETSC_HAVE_SSE) 89428e30874SSatish Balay 89528e30874SSatish Balay #include PETSC_HAVE_SSE 89628e30874SSatish Balay 89728e30874SSatish Balay #undef __FUNCT__ 89828e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_4_SSE_Demotion" 89928e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat A,Vec bb,Vec xx) 90028e30874SSatish Balay { 90128e30874SSatish Balay /* 90228e30874SSatish Balay Note: This code uses demotion of double 90328e30874SSatish Balay to float when performing the mixed-mode computation. 90428e30874SSatish Balay This may not be numerically reasonable for all applications. 90528e30874SSatish Balay */ 90628e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 90728e30874SSatish Balay IS iscol=a->col,isrow=a->row; 90828e30874SSatish Balay PetscErrorCode ierr; 90928e30874SSatish Balay PetscInt i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,ai16; 91028e30874SSatish Balay const PetscInt *r,*c,*diag = a->diag,*rout,*cout; 91128e30874SSatish Balay MatScalar *aa=a->a,*v; 91228e30874SSatish Balay PetscScalar *x,*b,*t; 91328e30874SSatish Balay 91428e30874SSatish Balay /* Make space in temp stack for 16 Byte Aligned arrays */ 91528e30874SSatish Balay float ssealignedspace[11],*tmps,*tmpx; 91628e30874SSatish Balay unsigned long offset; 91728e30874SSatish Balay 91828e30874SSatish Balay PetscFunctionBegin; 91928e30874SSatish Balay SSE_SCOPE_BEGIN; 92028e30874SSatish Balay 92128e30874SSatish Balay offset = (unsigned long)ssealignedspace % 16; 92228e30874SSatish Balay if (offset) offset = (16 - offset)/4; 92328e30874SSatish Balay tmps = &ssealignedspace[offset]; 92428e30874SSatish Balay tmpx = &ssealignedspace[offset+4]; 92528e30874SSatish Balay PREFETCH_NTA(aa+16*ai[1]); 92628e30874SSatish Balay 92728e30874SSatish Balay ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 92828e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 92928e30874SSatish Balay t = a->solve_work; 93028e30874SSatish Balay 93128e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 93228e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 93328e30874SSatish Balay 93428e30874SSatish Balay /* forward solve the lower triangular */ 93528e30874SSatish Balay idx = 4*(*r++); 93628e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 93728e30874SSatish Balay t[2] = b[2+idx]; t[3] = b[3+idx]; 93828e30874SSatish Balay v = aa + 16*ai[1]; 93928e30874SSatish Balay 94028e30874SSatish Balay for (i=1; i<n; ) { 94128e30874SSatish Balay PREFETCH_NTA(&v[8]); 94228e30874SSatish Balay vi = aj + ai[i]; 94328e30874SSatish Balay nz = diag[i] - ai[i]; 94428e30874SSatish Balay idx = 4*(*r++); 94528e30874SSatish Balay 94628e30874SSatish Balay /* Demote sum from double to float */ 94728e30874SSatish Balay CONVERT_DOUBLE4_FLOAT4(tmps,&b[idx]); 94828e30874SSatish Balay LOAD_PS(tmps,XMM7); 94928e30874SSatish Balay 95028e30874SSatish Balay while (nz--) { 95128e30874SSatish Balay PREFETCH_NTA(&v[16]); 95228e30874SSatish Balay idx = 4*(*vi++); 95328e30874SSatish Balay 95428e30874SSatish Balay /* Demote solution (so far) from double to float */ 95528e30874SSatish Balay CONVERT_DOUBLE4_FLOAT4(tmpx,&x[idx]); 95628e30874SSatish Balay 95728e30874SSatish Balay /* 4x4 Matrix-Vector product with negative accumulation: */ 95828e30874SSatish Balay SSE_INLINE_BEGIN_2(tmpx,v) 95928e30874SSatish Balay SSE_LOAD_PS(SSE_ARG_1,FLOAT_0,XMM6) 96028e30874SSatish Balay 96128e30874SSatish Balay /* First Column */ 96228e30874SSatish Balay SSE_COPY_PS(XMM0,XMM6) 96328e30874SSatish Balay SSE_SHUFFLE(XMM0,XMM0,0x00) 96428e30874SSatish Balay SSE_MULT_PS_M(XMM0,SSE_ARG_2,FLOAT_0) 96528e30874SSatish Balay SSE_SUB_PS(XMM7,XMM0) 96628e30874SSatish Balay 96728e30874SSatish Balay /* Second Column */ 96828e30874SSatish Balay SSE_COPY_PS(XMM1,XMM6) 96928e30874SSatish Balay SSE_SHUFFLE(XMM1,XMM1,0x55) 97028e30874SSatish Balay SSE_MULT_PS_M(XMM1,SSE_ARG_2,FLOAT_4) 97128e30874SSatish Balay SSE_SUB_PS(XMM7,XMM1) 97228e30874SSatish Balay 97328e30874SSatish Balay SSE_PREFETCH_NTA(SSE_ARG_2,FLOAT_24) 97428e30874SSatish Balay 97528e30874SSatish Balay /* Third Column */ 97628e30874SSatish Balay SSE_COPY_PS(XMM2,XMM6) 97728e30874SSatish Balay SSE_SHUFFLE(XMM2,XMM2,0xAA) 97828e30874SSatish Balay SSE_MULT_PS_M(XMM2,SSE_ARG_2,FLOAT_8) 97928e30874SSatish Balay SSE_SUB_PS(XMM7,XMM2) 98028e30874SSatish Balay 98128e30874SSatish Balay /* Fourth Column */ 98228e30874SSatish Balay SSE_COPY_PS(XMM3,XMM6) 98328e30874SSatish Balay SSE_SHUFFLE(XMM3,XMM3,0xFF) 98428e30874SSatish Balay SSE_MULT_PS_M(XMM3,SSE_ARG_2,FLOAT_12) 98528e30874SSatish Balay SSE_SUB_PS(XMM7,XMM3) 98628e30874SSatish Balay SSE_INLINE_END_2 98728e30874SSatish Balay 98828e30874SSatish Balay v += 16; 98928e30874SSatish Balay } 99028e30874SSatish Balay idx = 4*i; 99128e30874SSatish Balay v = aa + 16*ai[++i]; 99228e30874SSatish Balay PREFETCH_NTA(v); 99328e30874SSatish Balay STORE_PS(tmps,XMM7); 99428e30874SSatish Balay 99528e30874SSatish Balay /* Promote result from float to double */ 99628e30874SSatish Balay CONVERT_FLOAT4_DOUBLE4(&t[idx],tmps); 99728e30874SSatish Balay } 99828e30874SSatish Balay /* backward solve the upper triangular */ 99928e30874SSatish Balay idt = 4*(n-1); 100028e30874SSatish Balay ai16 = 16*diag[n-1]; 100128e30874SSatish Balay v = aa + ai16 + 16; 100228e30874SSatish Balay for (i=n-1; i>=0; ) { 100328e30874SSatish Balay PREFETCH_NTA(&v[8]); 100428e30874SSatish Balay vi = aj + diag[i] + 1; 100528e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 100628e30874SSatish Balay 100728e30874SSatish Balay /* Demote accumulator from double to float */ 100828e30874SSatish Balay CONVERT_DOUBLE4_FLOAT4(tmps,&t[idt]); 100928e30874SSatish Balay LOAD_PS(tmps,XMM7); 101028e30874SSatish Balay 101128e30874SSatish Balay while (nz--) { 101228e30874SSatish Balay PREFETCH_NTA(&v[16]); 101328e30874SSatish Balay idx = 4*(*vi++); 101428e30874SSatish Balay 101528e30874SSatish Balay /* Demote solution (so far) from double to float */ 101628e30874SSatish Balay CONVERT_DOUBLE4_FLOAT4(tmpx,&t[idx]); 101728e30874SSatish Balay 101828e30874SSatish Balay /* 4x4 Matrix-Vector Product with negative accumulation: */ 101928e30874SSatish Balay SSE_INLINE_BEGIN_2(tmpx,v) 102028e30874SSatish Balay SSE_LOAD_PS(SSE_ARG_1,FLOAT_0,XMM6) 102128e30874SSatish Balay 102228e30874SSatish Balay /* First Column */ 102328e30874SSatish Balay SSE_COPY_PS(XMM0,XMM6) 102428e30874SSatish Balay SSE_SHUFFLE(XMM0,XMM0,0x00) 102528e30874SSatish Balay SSE_MULT_PS_M(XMM0,SSE_ARG_2,FLOAT_0) 102628e30874SSatish Balay SSE_SUB_PS(XMM7,XMM0) 102728e30874SSatish Balay 102828e30874SSatish Balay /* Second Column */ 102928e30874SSatish Balay SSE_COPY_PS(XMM1,XMM6) 103028e30874SSatish Balay SSE_SHUFFLE(XMM1,XMM1,0x55) 103128e30874SSatish Balay SSE_MULT_PS_M(XMM1,SSE_ARG_2,FLOAT_4) 103228e30874SSatish Balay SSE_SUB_PS(XMM7,XMM1) 103328e30874SSatish Balay 103428e30874SSatish Balay SSE_PREFETCH_NTA(SSE_ARG_2,FLOAT_24) 103528e30874SSatish Balay 103628e30874SSatish Balay /* Third Column */ 103728e30874SSatish Balay SSE_COPY_PS(XMM2,XMM6) 103828e30874SSatish Balay SSE_SHUFFLE(XMM2,XMM2,0xAA) 103928e30874SSatish Balay SSE_MULT_PS_M(XMM2,SSE_ARG_2,FLOAT_8) 104028e30874SSatish Balay SSE_SUB_PS(XMM7,XMM2) 104128e30874SSatish Balay 104228e30874SSatish Balay /* Fourth Column */ 104328e30874SSatish Balay SSE_COPY_PS(XMM3,XMM6) 104428e30874SSatish Balay SSE_SHUFFLE(XMM3,XMM3,0xFF) 104528e30874SSatish Balay SSE_MULT_PS_M(XMM3,SSE_ARG_2,FLOAT_12) 104628e30874SSatish Balay SSE_SUB_PS(XMM7,XMM3) 104728e30874SSatish Balay SSE_INLINE_END_2 104828e30874SSatish Balay v += 16; 104928e30874SSatish Balay } 105028e30874SSatish Balay v = aa + ai16; 105128e30874SSatish Balay ai16 = 16*diag[--i]; 105228e30874SSatish Balay PREFETCH_NTA(aa+ai16+16); 105328e30874SSatish Balay /* 105428e30874SSatish Balay Scale the result by the diagonal 4x4 block, 105528e30874SSatish Balay which was inverted as part of the factorization 105628e30874SSatish Balay */ 105728e30874SSatish Balay SSE_INLINE_BEGIN_3(v,tmps,aa+ai16) 105828e30874SSatish Balay /* First Column */ 105928e30874SSatish Balay SSE_COPY_PS(XMM0,XMM7) 106028e30874SSatish Balay SSE_SHUFFLE(XMM0,XMM0,0x00) 106128e30874SSatish Balay SSE_MULT_PS_M(XMM0,SSE_ARG_1,FLOAT_0) 106228e30874SSatish Balay 106328e30874SSatish Balay /* Second Column */ 106428e30874SSatish Balay SSE_COPY_PS(XMM1,XMM7) 106528e30874SSatish Balay SSE_SHUFFLE(XMM1,XMM1,0x55) 106628e30874SSatish Balay SSE_MULT_PS_M(XMM1,SSE_ARG_1,FLOAT_4) 106728e30874SSatish Balay SSE_ADD_PS(XMM0,XMM1) 106828e30874SSatish Balay 106928e30874SSatish Balay SSE_PREFETCH_NTA(SSE_ARG_3,FLOAT_24) 107028e30874SSatish Balay 107128e30874SSatish Balay /* Third Column */ 107228e30874SSatish Balay SSE_COPY_PS(XMM2,XMM7) 107328e30874SSatish Balay SSE_SHUFFLE(XMM2,XMM2,0xAA) 107428e30874SSatish Balay SSE_MULT_PS_M(XMM2,SSE_ARG_1,FLOAT_8) 107528e30874SSatish Balay SSE_ADD_PS(XMM0,XMM2) 107628e30874SSatish Balay 107728e30874SSatish Balay /* Fourth Column */ 107828e30874SSatish Balay SSE_COPY_PS(XMM3,XMM7) 107928e30874SSatish Balay SSE_SHUFFLE(XMM3,XMM3,0xFF) 108028e30874SSatish Balay SSE_MULT_PS_M(XMM3,SSE_ARG_1,FLOAT_12) 108128e30874SSatish Balay SSE_ADD_PS(XMM0,XMM3) 108228e30874SSatish Balay 108328e30874SSatish Balay SSE_STORE_PS(SSE_ARG_2,FLOAT_0,XMM0) 108428e30874SSatish Balay SSE_INLINE_END_3 108528e30874SSatish Balay 108628e30874SSatish Balay /* Promote solution from float to double */ 108728e30874SSatish Balay CONVERT_FLOAT4_DOUBLE4(&t[idt],tmps); 108828e30874SSatish Balay 108928e30874SSatish Balay /* Apply reordering to t and stream into x. */ 109028e30874SSatish Balay /* This way, x doesn't pollute the cache. */ 109128e30874SSatish Balay /* Be careful with size: 2 doubles = 4 floats! */ 109228e30874SSatish Balay idc = 4*(*c--); 109328e30874SSatish Balay SSE_INLINE_BEGIN_2((float*)&t[idt],(float*)&x[idc]) 109428e30874SSatish Balay /* x[idc] = t[idt]; x[1+idc] = t[1+idc]; */ 109528e30874SSatish Balay SSE_LOAD_PS(SSE_ARG_1,FLOAT_0,XMM0) 109628e30874SSatish Balay SSE_STREAM_PS(SSE_ARG_2,FLOAT_0,XMM0) 109728e30874SSatish Balay /* x[idc+2] = t[idt+2]; x[3+idc] = t[3+idc]; */ 109828e30874SSatish Balay SSE_LOAD_PS(SSE_ARG_1,FLOAT_4,XMM1) 109928e30874SSatish Balay SSE_STREAM_PS(SSE_ARG_2,FLOAT_4,XMM1) 110028e30874SSatish Balay SSE_INLINE_END_2 110128e30874SSatish Balay v = aa + ai16 + 16; 110228e30874SSatish Balay idt -= 4; 110328e30874SSatish Balay } 110428e30874SSatish Balay 110528e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 110628e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 110728e30874SSatish Balay ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 110828e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 110928e30874SSatish Balay ierr = PetscLogFlops(2.0*16*(a->nz) - 4.0*A->cmap->n);CHKERRQ(ierr); 111028e30874SSatish Balay SSE_SCOPE_END; 111128e30874SSatish Balay PetscFunctionReturn(0); 111228e30874SSatish Balay } 111328e30874SSatish Balay 111428e30874SSatish Balay #endif 111528e30874SSatish Balay 111628e30874SSatish Balay #undef __FUNCT__ 111728e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_3_inplace" 111828e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_3_inplace(Mat A,Vec bb,Vec xx) 111928e30874SSatish Balay { 112028e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 112128e30874SSatish Balay IS iscol=a->col,isrow=a->row; 112228e30874SSatish Balay PetscErrorCode ierr; 112328e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j; 112428e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 112528e30874SSatish Balay const PetscInt *r,*c,*diag = a->diag,*rout,*cout; 112628e30874SSatish Balay const MatScalar *aa=a->a,*v; 112728e30874SSatish Balay PetscScalar *x,s1,s2,s3,x1,x2,x3,*t; 112828e30874SSatish Balay const PetscScalar *b; 112928e30874SSatish Balay 113028e30874SSatish Balay PetscFunctionBegin; 113128e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 113228e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 113328e30874SSatish Balay t = a->solve_work; 113428e30874SSatish Balay 113528e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 113628e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 113728e30874SSatish Balay 113828e30874SSatish Balay /* forward solve the lower triangular */ 113928e30874SSatish Balay idx = 3*(*r++); 114028e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; t[2] = b[2+idx]; 114128e30874SSatish Balay for (i=1; i<n; i++) { 114228e30874SSatish Balay v = aa + 9*ai[i]; 114328e30874SSatish Balay vi = aj + ai[i]; 114428e30874SSatish Balay nz = diag[i] - ai[i]; 114528e30874SSatish Balay idx = 3*(*r++); 114628e30874SSatish Balay s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx]; 114728e30874SSatish Balay while (nz--) { 114828e30874SSatish Balay idx = 3*(*vi++); 114928e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 115028e30874SSatish Balay s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 115128e30874SSatish Balay s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 115228e30874SSatish Balay s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 115328e30874SSatish Balay v += 9; 115428e30874SSatish Balay } 115528e30874SSatish Balay idx = 3*i; 115628e30874SSatish Balay t[idx] = s1; t[1+idx] = s2; t[2+idx] = s3; 115728e30874SSatish Balay } 115828e30874SSatish Balay /* backward solve the upper triangular */ 115928e30874SSatish Balay for (i=n-1; i>=0; i--) { 116028e30874SSatish Balay v = aa + 9*diag[i] + 9; 116128e30874SSatish Balay vi = aj + diag[i] + 1; 116228e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 116328e30874SSatish Balay idt = 3*i; 116428e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt]; 116528e30874SSatish Balay while (nz--) { 116628e30874SSatish Balay idx = 3*(*vi++); 116728e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 116828e30874SSatish Balay s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 116928e30874SSatish Balay s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 117028e30874SSatish Balay s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 117128e30874SSatish Balay v += 9; 117228e30874SSatish Balay } 117328e30874SSatish Balay idc = 3*(*c--); 117428e30874SSatish Balay v = aa + 9*diag[i]; 117528e30874SSatish Balay x[idc] = t[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3; 117628e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3; 117728e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3; 117828e30874SSatish Balay } 117928e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 118028e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 118128e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 118228e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 118328e30874SSatish Balay ierr = PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n);CHKERRQ(ierr); 118428e30874SSatish Balay PetscFunctionReturn(0); 118528e30874SSatish Balay } 118628e30874SSatish Balay 118728e30874SSatish Balay #undef __FUNCT__ 118828e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_3" 118928e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 119028e30874SSatish Balay { 119128e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 119228e30874SSatish Balay IS iscol=a->col,isrow=a->row; 119328e30874SSatish Balay PetscErrorCode ierr; 119428e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag; 119528e30874SSatish Balay PetscInt i,nz,idx,idt,idc,m; 119628e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 119728e30874SSatish Balay const MatScalar *aa=a->a,*v; 119828e30874SSatish Balay PetscScalar *x,s1,s2,s3,x1,x2,x3,*t; 119928e30874SSatish Balay const PetscScalar *b; 120028e30874SSatish Balay 120128e30874SSatish Balay PetscFunctionBegin; 120228e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 120328e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 120428e30874SSatish Balay t = a->solve_work; 120528e30874SSatish Balay 120628e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 120728e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 120828e30874SSatish Balay 120928e30874SSatish Balay /* forward solve the lower triangular */ 121028e30874SSatish Balay idx = 3*r[0]; 121128e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; t[2] = b[2+idx]; 121228e30874SSatish Balay for (i=1; i<n; i++) { 121328e30874SSatish Balay v = aa + 9*ai[i]; 121428e30874SSatish Balay vi = aj + ai[i]; 121528e30874SSatish Balay nz = ai[i+1] - ai[i]; 121628e30874SSatish Balay idx = 3*r[i]; 121728e30874SSatish Balay s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx]; 121828e30874SSatish Balay for (m=0; m<nz; m++) { 121928e30874SSatish Balay idx = 3*vi[m]; 122028e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 122128e30874SSatish Balay s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 122228e30874SSatish Balay s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 122328e30874SSatish Balay s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 122428e30874SSatish Balay v += 9; 122528e30874SSatish Balay } 122628e30874SSatish Balay idx = 3*i; 122728e30874SSatish Balay t[idx] = s1; t[1+idx] = s2; t[2+idx] = s3; 122828e30874SSatish Balay } 122928e30874SSatish Balay /* backward solve the upper triangular */ 123028e30874SSatish Balay for (i=n-1; i>=0; i--) { 123128e30874SSatish Balay v = aa + 9*(adiag[i+1]+1); 123228e30874SSatish Balay vi = aj + adiag[i+1]+1; 123328e30874SSatish Balay nz = adiag[i] - adiag[i+1] - 1; 123428e30874SSatish Balay idt = 3*i; 123528e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt]; 123628e30874SSatish Balay for (m=0; m<nz; m++) { 123728e30874SSatish Balay idx = 3*vi[m]; 123828e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 123928e30874SSatish Balay s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 124028e30874SSatish Balay s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 124128e30874SSatish Balay s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 124228e30874SSatish Balay v += 9; 124328e30874SSatish Balay } 124428e30874SSatish Balay idc = 3*c[i]; 124528e30874SSatish Balay x[idc] = t[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3; 124628e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3; 124728e30874SSatish Balay x[2+idc] = t[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3; 124828e30874SSatish Balay } 124928e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 125028e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 125128e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 125228e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 125328e30874SSatish Balay ierr = PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n);CHKERRQ(ierr); 125428e30874SSatish Balay PetscFunctionReturn(0); 125528e30874SSatish Balay } 125628e30874SSatish Balay 125728e30874SSatish Balay #undef __FUNCT__ 125828e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_2_inplace" 125928e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_2_inplace(Mat A,Vec bb,Vec xx) 126028e30874SSatish Balay { 126128e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 126228e30874SSatish Balay IS iscol=a->col,isrow=a->row; 126328e30874SSatish Balay PetscErrorCode ierr; 126428e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j; 126528e30874SSatish Balay PetscInt i,nz,idx,idt,idc; 126628e30874SSatish Balay const PetscInt *r,*c,*diag = a->diag,*rout,*cout; 126728e30874SSatish Balay const MatScalar *aa=a->a,*v; 126828e30874SSatish Balay PetscScalar *x,s1,s2,x1,x2,*t; 126928e30874SSatish Balay const PetscScalar *b; 127028e30874SSatish Balay 127128e30874SSatish Balay PetscFunctionBegin; 127228e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 127328e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 127428e30874SSatish Balay t = a->solve_work; 127528e30874SSatish Balay 127628e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 127728e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 127828e30874SSatish Balay 127928e30874SSatish Balay /* forward solve the lower triangular */ 128028e30874SSatish Balay idx = 2*(*r++); 128128e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 128228e30874SSatish Balay for (i=1; i<n; i++) { 128328e30874SSatish Balay v = aa + 4*ai[i]; 128428e30874SSatish Balay vi = aj + ai[i]; 128528e30874SSatish Balay nz = diag[i] - ai[i]; 128628e30874SSatish Balay idx = 2*(*r++); 128728e30874SSatish Balay s1 = b[idx]; s2 = b[1+idx]; 128828e30874SSatish Balay while (nz--) { 128928e30874SSatish Balay idx = 2*(*vi++); 129028e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 129128e30874SSatish Balay s1 -= v[0]*x1 + v[2]*x2; 129228e30874SSatish Balay s2 -= v[1]*x1 + v[3]*x2; 129328e30874SSatish Balay v += 4; 129428e30874SSatish Balay } 129528e30874SSatish Balay idx = 2*i; 129628e30874SSatish Balay t[idx] = s1; t[1+idx] = s2; 129728e30874SSatish Balay } 129828e30874SSatish Balay /* backward solve the upper triangular */ 129928e30874SSatish Balay for (i=n-1; i>=0; i--) { 130028e30874SSatish Balay v = aa + 4*diag[i] + 4; 130128e30874SSatish Balay vi = aj + diag[i] + 1; 130228e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 130328e30874SSatish Balay idt = 2*i; 130428e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 130528e30874SSatish Balay while (nz--) { 130628e30874SSatish Balay idx = 2*(*vi++); 130728e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 130828e30874SSatish Balay s1 -= v[0]*x1 + v[2]*x2; 130928e30874SSatish Balay s2 -= v[1]*x1 + v[3]*x2; 131028e30874SSatish Balay v += 4; 131128e30874SSatish Balay } 131228e30874SSatish Balay idc = 2*(*c--); 131328e30874SSatish Balay v = aa + 4*diag[i]; 131428e30874SSatish Balay x[idc] = t[idt] = v[0]*s1 + v[2]*s2; 131528e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1 + v[3]*s2; 131628e30874SSatish Balay } 131728e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 131828e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 131928e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 132028e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 132128e30874SSatish Balay ierr = PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);CHKERRQ(ierr); 132228e30874SSatish Balay PetscFunctionReturn(0); 132328e30874SSatish Balay } 132428e30874SSatish Balay 132528e30874SSatish Balay #undef __FUNCT__ 132628e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_2" 132728e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 132828e30874SSatish Balay { 132928e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 133028e30874SSatish Balay IS iscol=a->col,isrow=a->row; 133128e30874SSatish Balay PetscErrorCode ierr; 133228e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag; 133328e30874SSatish Balay PetscInt i,nz,idx,jdx,idt,idc,m; 133428e30874SSatish Balay const PetscInt *r,*c,*rout,*cout; 133528e30874SSatish Balay const MatScalar *aa=a->a,*v; 133628e30874SSatish Balay PetscScalar *x,s1,s2,x1,x2,*t; 133728e30874SSatish Balay const PetscScalar *b; 133828e30874SSatish Balay 133928e30874SSatish Balay PetscFunctionBegin; 134028e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 134128e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 134228e30874SSatish Balay t = a->solve_work; 134328e30874SSatish Balay 134428e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 134528e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 134628e30874SSatish Balay 134728e30874SSatish Balay /* forward solve the lower triangular */ 134828e30874SSatish Balay idx = 2*r[0]; 134928e30874SSatish Balay t[0] = b[idx]; t[1] = b[1+idx]; 135028e30874SSatish Balay for (i=1; i<n; i++) { 135128e30874SSatish Balay v = aa + 4*ai[i]; 135228e30874SSatish Balay vi = aj + ai[i]; 135328e30874SSatish Balay nz = ai[i+1] - ai[i]; 135428e30874SSatish Balay idx = 2*r[i]; 135528e30874SSatish Balay s1 = b[idx]; s2 = b[1+idx]; 135628e30874SSatish Balay for (m=0; m<nz; m++) { 135728e30874SSatish Balay jdx = 2*vi[m]; 135828e30874SSatish Balay x1 = t[jdx]; x2 = t[1+jdx]; 135928e30874SSatish Balay s1 -= v[0]*x1 + v[2]*x2; 136028e30874SSatish Balay s2 -= v[1]*x1 + v[3]*x2; 136128e30874SSatish Balay v += 4; 136228e30874SSatish Balay } 136328e30874SSatish Balay idx = 2*i; 136428e30874SSatish Balay t[idx] = s1; t[1+idx] = s2; 136528e30874SSatish Balay } 136628e30874SSatish Balay /* backward solve the upper triangular */ 136728e30874SSatish Balay for (i=n-1; i>=0; i--) { 136828e30874SSatish Balay v = aa + 4*(adiag[i+1]+1); 136928e30874SSatish Balay vi = aj + adiag[i+1]+1; 137028e30874SSatish Balay nz = adiag[i] - adiag[i+1] - 1; 137128e30874SSatish Balay idt = 2*i; 137228e30874SSatish Balay s1 = t[idt]; s2 = t[1+idt]; 137328e30874SSatish Balay for (m=0; m<nz; m++) { 137428e30874SSatish Balay idx = 2*vi[m]; 137528e30874SSatish Balay x1 = t[idx]; x2 = t[1+idx]; 137628e30874SSatish Balay s1 -= v[0]*x1 + v[2]*x2; 137728e30874SSatish Balay s2 -= v[1]*x1 + v[3]*x2; 137828e30874SSatish Balay v += 4; 137928e30874SSatish Balay } 138028e30874SSatish Balay idc = 2*c[i]; 138128e30874SSatish Balay x[idc] = t[idt] = v[0]*s1 + v[2]*s2; 138228e30874SSatish Balay x[1+idc] = t[1+idt] = v[1]*s1 + v[3]*s2; 138328e30874SSatish Balay } 138428e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 138528e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 138628e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 138728e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 138828e30874SSatish Balay ierr = PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);CHKERRQ(ierr); 138928e30874SSatish Balay PetscFunctionReturn(0); 139028e30874SSatish Balay } 139128e30874SSatish Balay 139228e30874SSatish Balay #undef __FUNCT__ 139328e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_1_inplace" 139428e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_1_inplace(Mat A,Vec bb,Vec xx) 139528e30874SSatish Balay { 139628e30874SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 139728e30874SSatish Balay IS iscol=a->col,isrow=a->row; 139828e30874SSatish Balay PetscErrorCode ierr; 139928e30874SSatish Balay const PetscInt n=a->mbs,*vi,*ai=a->i,*aj=a->j; 140028e30874SSatish Balay PetscInt i,nz; 140128e30874SSatish Balay const PetscInt *r,*c,*diag = a->diag,*rout,*cout; 140228e30874SSatish Balay const MatScalar *aa=a->a,*v; 140328e30874SSatish Balay PetscScalar *x,s1,*t; 140428e30874SSatish Balay const PetscScalar *b; 140528e30874SSatish Balay 140628e30874SSatish Balay PetscFunctionBegin; 140728e30874SSatish Balay if (!n) PetscFunctionReturn(0); 140828e30874SSatish Balay 140928e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 141028e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141128e30874SSatish Balay t = a->solve_work; 141228e30874SSatish Balay 141328e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 141428e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 141528e30874SSatish Balay 141628e30874SSatish Balay /* forward solve the lower triangular */ 141728e30874SSatish Balay t[0] = b[*r++]; 141828e30874SSatish Balay for (i=1; i<n; i++) { 141928e30874SSatish Balay v = aa + ai[i]; 142028e30874SSatish Balay vi = aj + ai[i]; 142128e30874SSatish Balay nz = diag[i] - ai[i]; 142228e30874SSatish Balay s1 = b[*r++]; 142328e30874SSatish Balay while (nz--) { 142428e30874SSatish Balay s1 -= (*v++)*t[*vi++]; 142528e30874SSatish Balay } 142628e30874SSatish Balay t[i] = s1; 142728e30874SSatish Balay } 142828e30874SSatish Balay /* backward solve the upper triangular */ 142928e30874SSatish Balay for (i=n-1; i>=0; i--) { 143028e30874SSatish Balay v = aa + diag[i] + 1; 143128e30874SSatish Balay vi = aj + diag[i] + 1; 143228e30874SSatish Balay nz = ai[i+1] - diag[i] - 1; 143328e30874SSatish Balay s1 = t[i]; 143428e30874SSatish Balay while (nz--) { 143528e30874SSatish Balay s1 -= (*v++)*t[*vi++]; 143628e30874SSatish Balay } 143728e30874SSatish Balay x[*c--] = t[i] = aa[diag[i]]*s1; 143828e30874SSatish Balay } 143928e30874SSatish Balay 144028e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 144128e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 144228e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 144328e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 144428e30874SSatish Balay ierr = PetscLogFlops(2.0*1*(a->nz) - A->cmap->n);CHKERRQ(ierr); 144528e30874SSatish Balay PetscFunctionReturn(0); 144628e30874SSatish Balay } 144728e30874SSatish Balay 144828e30874SSatish Balay #undef __FUNCT__ 144928e30874SSatish Balay #define __FUNCT__ "MatSolve_SeqBAIJ_1" 145028e30874SSatish Balay PetscErrorCode MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 145128e30874SSatish Balay { 145228e30874SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 145328e30874SSatish Balay IS iscol = a->col,isrow = a->row; 145428e30874SSatish Balay PetscErrorCode ierr; 145528e30874SSatish Balay PetscInt i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 145628e30874SSatish Balay const PetscInt *rout,*cout,*r,*c; 145728e30874SSatish Balay PetscScalar *x,*tmp,sum; 145828e30874SSatish Balay const PetscScalar *b; 145928e30874SSatish Balay const MatScalar *aa = a->a,*v; 146028e30874SSatish Balay 146128e30874SSatish Balay PetscFunctionBegin; 146228e30874SSatish Balay if (!n) PetscFunctionReturn(0); 146328e30874SSatish Balay 146428e30874SSatish Balay ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 146528e30874SSatish Balay ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 146628e30874SSatish Balay tmp = a->solve_work; 146728e30874SSatish Balay 146828e30874SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 146928e30874SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 147028e30874SSatish Balay 147128e30874SSatish Balay /* forward solve the lower triangular */ 147228e30874SSatish Balay tmp[0] = b[r[0]]; 147328e30874SSatish Balay v = aa; 147428e30874SSatish Balay vi = aj; 147528e30874SSatish Balay for (i=1; i<n; i++) { 147628e30874SSatish Balay nz = ai[i+1] - ai[i]; 147728e30874SSatish Balay sum = b[r[i]]; 147828e30874SSatish Balay PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 147928e30874SSatish Balay tmp[i] = sum; 148028e30874SSatish Balay v += nz; vi += nz; 148128e30874SSatish Balay } 148228e30874SSatish Balay 148328e30874SSatish Balay /* backward solve the upper triangular */ 148428e30874SSatish Balay for (i=n-1; i>=0; i--) { 148528e30874SSatish Balay v = aa + adiag[i+1]+1; 148628e30874SSatish Balay vi = aj + adiag[i+1]+1; 148728e30874SSatish Balay nz = adiag[i]-adiag[i+1]-1; 148828e30874SSatish Balay sum = tmp[i]; 148928e30874SSatish Balay PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 149028e30874SSatish Balay x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 149128e30874SSatish Balay } 149228e30874SSatish Balay 149328e30874SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 149428e30874SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 149528e30874SSatish Balay ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 149628e30874SSatish Balay ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 149728e30874SSatish Balay ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 149828e30874SSatish Balay PetscFunctionReturn(0); 149928e30874SSatish Balay } 150028e30874SSatish Balay 1501