1*f1af5d2fSBarry Smith /*$Id: baijfact2.c,v 1.30 1999/10/24 14:02:28 bsmith Exp bsmith $*/ 24e2b4712SSatish Balay /* 34e2b4712SSatish Balay Factorization code for BAIJ format. 44e2b4712SSatish Balay */ 54e2b4712SSatish Balay 64e2b4712SSatish Balay #include "src/mat/impls/baij/seq/baij.h" 74e2b4712SSatish Balay #include "src/vec/vecimpl.h" 84e2b4712SSatish Balay #include "src/inline/ilu.h" 974c49faeSBarry Smith #include "src/inline/dot.h" 104e2b4712SSatish Balay 11*f1af5d2fSBarry Smith #undef __FUNC__ 12*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_1_NaturalOrdering" 13*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 14*f1af5d2fSBarry Smith { 15*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 16*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz; 17*f1af5d2fSBarry Smith int *diag = a->diag; 18*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 19*f1af5d2fSBarry Smith Scalar s1,*x,*b; 20*f1af5d2fSBarry Smith 21*f1af5d2fSBarry Smith PetscFunctionBegin; 22*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 23*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 24*f1af5d2fSBarry Smith 25*f1af5d2fSBarry Smith /* forward solve the U^T */ 26*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 27*f1af5d2fSBarry Smith 28*f1af5d2fSBarry Smith v = aa + diag[i]; 29*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 30*f1af5d2fSBarry Smith s1 = (*v++)*b[i]; 31*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 32*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 33*f1af5d2fSBarry Smith while (nz--) { 34*f1af5d2fSBarry Smith x[*vi++] -= (*v++)*s1; 35*f1af5d2fSBarry Smith } 36*f1af5d2fSBarry Smith x[i] = s1; 37*f1af5d2fSBarry Smith } 38*f1af5d2fSBarry Smith /* backward solve the L^T */ 39*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 40*f1af5d2fSBarry Smith v = aa + diag[i] - 1; 41*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 42*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 43*f1af5d2fSBarry Smith s1 = x[i]; 44*f1af5d2fSBarry Smith while (nz--) { 45*f1af5d2fSBarry Smith x[*vi--] -= (*v--)*s1; 46*f1af5d2fSBarry Smith } 47*f1af5d2fSBarry Smith } 48*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 49*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 50*f1af5d2fSBarry Smith PLogFlops(2*(a->nz) - a->n); 51*f1af5d2fSBarry Smith PetscFunctionReturn(0); 52*f1af5d2fSBarry Smith } 53*f1af5d2fSBarry Smith 54*f1af5d2fSBarry Smith #undef __FUNC__ 55*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_2_NaturalOrdering" 56*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 57*f1af5d2fSBarry Smith { 58*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 59*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 60*f1af5d2fSBarry Smith int *diag = a->diag,oidx; 61*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 62*f1af5d2fSBarry Smith Scalar s1,s2,x1,x2; 63*f1af5d2fSBarry Smith Scalar *x,*b; 64*f1af5d2fSBarry Smith 65*f1af5d2fSBarry Smith PetscFunctionBegin; 66*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 67*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 68*f1af5d2fSBarry Smith 69*f1af5d2fSBarry Smith /* forward solve the U^T */ 70*f1af5d2fSBarry Smith idx = 0; 71*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 72*f1af5d2fSBarry Smith 73*f1af5d2fSBarry Smith v = aa + 4*diag[i]; 74*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 75*f1af5d2fSBarry Smith x1 = b[idx]; x2 = b[1+idx]; 76*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2; 77*f1af5d2fSBarry Smith s2 = v[2]*x1 + v[3]*x2; 78*f1af5d2fSBarry Smith v += 4; 79*f1af5d2fSBarry Smith 80*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 81*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 82*f1af5d2fSBarry Smith while (nz--) { 83*f1af5d2fSBarry Smith oidx = 2*(*vi++); 84*f1af5d2fSBarry Smith x[oidx] -= v[0]*s1 + v[1]*s2; 85*f1af5d2fSBarry Smith x[oidx+1] -= v[2]*s1 + v[3]*s2; 86*f1af5d2fSBarry Smith v += 4; 87*f1af5d2fSBarry Smith } 88*f1af5d2fSBarry Smith x[idx] = s1;x[1+idx] = s2; 89*f1af5d2fSBarry Smith idx += 2; 90*f1af5d2fSBarry Smith } 91*f1af5d2fSBarry Smith /* backward solve the L^T */ 92*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 93*f1af5d2fSBarry Smith v = aa + 4*diag[i] - 4; 94*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 95*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 96*f1af5d2fSBarry Smith idt = 2*i; 97*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 98*f1af5d2fSBarry Smith while (nz--) { 99*f1af5d2fSBarry Smith idx = 2*(*vi--); 100*f1af5d2fSBarry Smith x[idx] -= v[0]*s1 + v[1]*s2; 101*f1af5d2fSBarry Smith x[idx+1] -= v[2]*s1 + v[3]*s2; 102*f1af5d2fSBarry Smith v -= 4; 103*f1af5d2fSBarry Smith } 104*f1af5d2fSBarry Smith } 105*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 106*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 107*f1af5d2fSBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 108*f1af5d2fSBarry Smith PetscFunctionReturn(0); 109*f1af5d2fSBarry Smith } 110*f1af5d2fSBarry Smith 111*f1af5d2fSBarry Smith #undef __FUNC__ 112*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_3_NaturalOrdering" 113*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 114*f1af5d2fSBarry Smith { 115*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 116*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 117*f1af5d2fSBarry Smith int *diag = a->diag,oidx; 118*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 119*f1af5d2fSBarry Smith Scalar s1,s2,s3,x1,x2,x3; 120*f1af5d2fSBarry Smith Scalar *x,*b; 121*f1af5d2fSBarry Smith 122*f1af5d2fSBarry Smith PetscFunctionBegin; 123*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 124*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 125*f1af5d2fSBarry Smith 126*f1af5d2fSBarry Smith /* forward solve the U^T */ 127*f1af5d2fSBarry Smith idx = 0; 128*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 129*f1af5d2fSBarry Smith 130*f1af5d2fSBarry Smith v = aa + 9*diag[i]; 131*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 132*f1af5d2fSBarry Smith x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; 133*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3; 134*f1af5d2fSBarry Smith s2 = v[3]*x1 + v[4]*x2 + v[5]*x3; 135*f1af5d2fSBarry Smith s3 = v[6]*x1 + v[7]*x2 + v[8]*x3; 136*f1af5d2fSBarry Smith v += 9; 137*f1af5d2fSBarry Smith 138*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 139*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 140*f1af5d2fSBarry Smith while (nz--) { 141*f1af5d2fSBarry Smith oidx = 3*(*vi++); 142*f1af5d2fSBarry Smith x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3; 143*f1af5d2fSBarry Smith x[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3; 144*f1af5d2fSBarry Smith x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3; 145*f1af5d2fSBarry Smith v += 9; 146*f1af5d2fSBarry Smith } 147*f1af5d2fSBarry Smith x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3; 148*f1af5d2fSBarry Smith idx += 3; 149*f1af5d2fSBarry Smith } 150*f1af5d2fSBarry Smith /* backward solve the L^T */ 151*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 152*f1af5d2fSBarry Smith v = aa + 9*diag[i] - 9; 153*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 154*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 155*f1af5d2fSBarry Smith idt = 3*i; 156*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt]; 157*f1af5d2fSBarry Smith while (nz--) { 158*f1af5d2fSBarry Smith idx = 3*(*vi--); 159*f1af5d2fSBarry Smith x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3; 160*f1af5d2fSBarry Smith x[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3; 161*f1af5d2fSBarry Smith x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3; 162*f1af5d2fSBarry Smith v -= 9; 163*f1af5d2fSBarry Smith } 164*f1af5d2fSBarry Smith } 165*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 166*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 167*f1af5d2fSBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 168*f1af5d2fSBarry Smith PetscFunctionReturn(0); 169*f1af5d2fSBarry Smith } 170*f1af5d2fSBarry Smith 171*f1af5d2fSBarry Smith #undef __FUNC__ 172*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_4_NaturalOrdering" 173*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 174*f1af5d2fSBarry Smith { 175*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 176*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 177*f1af5d2fSBarry Smith int *diag = a->diag,oidx; 178*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 179*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,x1,x2,x3,x4; 180*f1af5d2fSBarry Smith Scalar *x,*b; 181*f1af5d2fSBarry Smith 182*f1af5d2fSBarry Smith PetscFunctionBegin; 183*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 184*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 185*f1af5d2fSBarry Smith 186*f1af5d2fSBarry Smith /* forward solve the U^T */ 187*f1af5d2fSBarry Smith idx = 0; 188*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 189*f1af5d2fSBarry Smith 190*f1af5d2fSBarry Smith v = aa + 16*diag[i]; 191*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 192*f1af5d2fSBarry Smith x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; 193*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 194*f1af5d2fSBarry Smith s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 195*f1af5d2fSBarry Smith s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 196*f1af5d2fSBarry Smith s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 197*f1af5d2fSBarry Smith v += 16; 198*f1af5d2fSBarry Smith 199*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 200*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 201*f1af5d2fSBarry Smith while (nz--) { 202*f1af5d2fSBarry Smith oidx = 4*(*vi++); 203*f1af5d2fSBarry Smith x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4; 204*f1af5d2fSBarry Smith x[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4; 205*f1af5d2fSBarry Smith x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4; 206*f1af5d2fSBarry Smith x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4; 207*f1af5d2fSBarry Smith v += 16; 208*f1af5d2fSBarry Smith } 209*f1af5d2fSBarry Smith x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; 210*f1af5d2fSBarry Smith idx += 4; 211*f1af5d2fSBarry Smith } 212*f1af5d2fSBarry Smith /* backward solve the L^T */ 213*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 214*f1af5d2fSBarry Smith v = aa + 16*diag[i] - 16; 215*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 216*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 217*f1af5d2fSBarry Smith idt = 4*i; 218*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; 219*f1af5d2fSBarry Smith while (nz--) { 220*f1af5d2fSBarry Smith idx = 4*(*vi--); 221*f1af5d2fSBarry Smith x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4; 222*f1af5d2fSBarry Smith x[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4; 223*f1af5d2fSBarry Smith x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4; 224*f1af5d2fSBarry Smith x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4; 225*f1af5d2fSBarry Smith v -= 16; 226*f1af5d2fSBarry Smith } 227*f1af5d2fSBarry Smith } 228*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 229*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 230*f1af5d2fSBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 231*f1af5d2fSBarry Smith PetscFunctionReturn(0); 232*f1af5d2fSBarry Smith } 233*f1af5d2fSBarry Smith 234*f1af5d2fSBarry Smith #undef __FUNC__ 235*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_5_NaturalOrdering" 236*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 237*f1af5d2fSBarry Smith { 238*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 239*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 240*f1af5d2fSBarry Smith int *diag = a->diag,oidx; 241*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 242*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5; 243*f1af5d2fSBarry Smith Scalar *x,*b; 244*f1af5d2fSBarry Smith 245*f1af5d2fSBarry Smith PetscFunctionBegin; 246*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 247*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 248*f1af5d2fSBarry Smith 249*f1af5d2fSBarry Smith /* forward solve the U^T */ 250*f1af5d2fSBarry Smith idx = 0; 251*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 252*f1af5d2fSBarry Smith 253*f1af5d2fSBarry Smith v = aa + 25*diag[i]; 254*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 255*f1af5d2fSBarry Smith x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx]; 256*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 257*f1af5d2fSBarry Smith s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 258*f1af5d2fSBarry Smith s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 259*f1af5d2fSBarry Smith s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 260*f1af5d2fSBarry Smith s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 261*f1af5d2fSBarry Smith v += 25; 262*f1af5d2fSBarry Smith 263*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 264*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 265*f1af5d2fSBarry Smith while (nz--) { 266*f1af5d2fSBarry Smith oidx = 5*(*vi++); 267*f1af5d2fSBarry Smith x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5; 268*f1af5d2fSBarry Smith x[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5; 269*f1af5d2fSBarry Smith x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5; 270*f1af5d2fSBarry Smith x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5; 271*f1af5d2fSBarry Smith x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5; 272*f1af5d2fSBarry Smith v += 25; 273*f1af5d2fSBarry Smith } 274*f1af5d2fSBarry Smith x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5; 275*f1af5d2fSBarry Smith idx += 5; 276*f1af5d2fSBarry Smith } 277*f1af5d2fSBarry Smith /* backward solve the L^T */ 278*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 279*f1af5d2fSBarry Smith v = aa + 25*diag[i] - 25; 280*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 281*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 282*f1af5d2fSBarry Smith idt = 5*i; 283*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt]; 284*f1af5d2fSBarry Smith while (nz--) { 285*f1af5d2fSBarry Smith idx = 5*(*vi--); 286*f1af5d2fSBarry Smith x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5; 287*f1af5d2fSBarry Smith x[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5; 288*f1af5d2fSBarry Smith x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5; 289*f1af5d2fSBarry Smith x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5; 290*f1af5d2fSBarry Smith x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5; 291*f1af5d2fSBarry Smith v -= 25; 292*f1af5d2fSBarry Smith } 293*f1af5d2fSBarry Smith } 294*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 295*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 296*f1af5d2fSBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 297*f1af5d2fSBarry Smith PetscFunctionReturn(0); 298*f1af5d2fSBarry Smith } 299*f1af5d2fSBarry Smith 300*f1af5d2fSBarry Smith #undef __FUNC__ 301*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_6_NaturalOrdering" 302*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 303*f1af5d2fSBarry Smith { 304*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 305*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 306*f1af5d2fSBarry Smith int *diag = a->diag,oidx; 307*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 308*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6; 309*f1af5d2fSBarry Smith Scalar *x,*b; 310*f1af5d2fSBarry Smith 311*f1af5d2fSBarry Smith PetscFunctionBegin; 312*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 313*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 314*f1af5d2fSBarry Smith 315*f1af5d2fSBarry Smith /* forward solve the U^T */ 316*f1af5d2fSBarry Smith idx = 0; 317*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 318*f1af5d2fSBarry Smith 319*f1af5d2fSBarry Smith v = aa + 36*diag[i]; 320*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 321*f1af5d2fSBarry Smith x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx]; 322*f1af5d2fSBarry Smith x6 = b[5+idx]; 323*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6; 324*f1af5d2fSBarry Smith s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6; 325*f1af5d2fSBarry Smith s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6; 326*f1af5d2fSBarry Smith s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6; 327*f1af5d2fSBarry Smith s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6; 328*f1af5d2fSBarry Smith s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6; 329*f1af5d2fSBarry Smith v += 36; 330*f1af5d2fSBarry Smith 331*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 332*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 333*f1af5d2fSBarry Smith while (nz--) { 334*f1af5d2fSBarry Smith oidx = 6*(*vi++); 335*f1af5d2fSBarry Smith x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6; 336*f1af5d2fSBarry Smith x[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6; 337*f1af5d2fSBarry Smith x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6; 338*f1af5d2fSBarry Smith x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6; 339*f1af5d2fSBarry Smith x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6; 340*f1af5d2fSBarry Smith x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6; 341*f1af5d2fSBarry Smith v += 36; 342*f1af5d2fSBarry Smith } 343*f1af5d2fSBarry Smith x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5; 344*f1af5d2fSBarry Smith x[5+idx] = s6; 345*f1af5d2fSBarry Smith idx += 6; 346*f1af5d2fSBarry Smith } 347*f1af5d2fSBarry Smith /* backward solve the L^T */ 348*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 349*f1af5d2fSBarry Smith v = aa + 36*diag[i] - 36; 350*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 351*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 352*f1af5d2fSBarry Smith idt = 6*i; 353*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt]; 354*f1af5d2fSBarry Smith s6 = x[5+idt]; 355*f1af5d2fSBarry Smith while (nz--) { 356*f1af5d2fSBarry Smith idx = 6*(*vi--); 357*f1af5d2fSBarry Smith x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6; 358*f1af5d2fSBarry Smith x[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6; 359*f1af5d2fSBarry Smith x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6; 360*f1af5d2fSBarry Smith x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6; 361*f1af5d2fSBarry Smith x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6; 362*f1af5d2fSBarry Smith x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6; 363*f1af5d2fSBarry Smith v -= 36; 364*f1af5d2fSBarry Smith } 365*f1af5d2fSBarry Smith } 366*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 367*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 368*f1af5d2fSBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 369*f1af5d2fSBarry Smith PetscFunctionReturn(0); 370*f1af5d2fSBarry Smith } 371*f1af5d2fSBarry Smith 372*f1af5d2fSBarry Smith #undef __FUNC__ 373*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_7_NaturalOrdering" 374*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 375*f1af5d2fSBarry Smith { 376*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 377*f1af5d2fSBarry Smith int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 378*f1af5d2fSBarry Smith int *diag = a->diag,oidx; 379*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 380*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7; 381*f1af5d2fSBarry Smith Scalar *x,*b; 382*f1af5d2fSBarry Smith 383*f1af5d2fSBarry Smith PetscFunctionBegin; 384*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 385*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 386*f1af5d2fSBarry Smith 387*f1af5d2fSBarry Smith /* forward solve the U^T */ 388*f1af5d2fSBarry Smith idx = 0; 389*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 390*f1af5d2fSBarry Smith 391*f1af5d2fSBarry Smith v = aa + 49*diag[i]; 392*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 393*f1af5d2fSBarry Smith x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx]; 394*f1af5d2fSBarry Smith x6 = b[5+idx]; x7 = b[6+idx]; 395*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7; 396*f1af5d2fSBarry Smith s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7; 397*f1af5d2fSBarry Smith s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7; 398*f1af5d2fSBarry Smith s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7; 399*f1af5d2fSBarry Smith s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7; 400*f1af5d2fSBarry Smith s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7; 401*f1af5d2fSBarry Smith s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7; 402*f1af5d2fSBarry Smith v += 49; 403*f1af5d2fSBarry Smith 404*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 405*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 406*f1af5d2fSBarry Smith while (nz--) { 407*f1af5d2fSBarry Smith oidx = 7*(*vi++); 408*f1af5d2fSBarry Smith x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7; 409*f1af5d2fSBarry Smith x[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7; 410*f1af5d2fSBarry Smith x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7; 411*f1af5d2fSBarry Smith x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7; 412*f1af5d2fSBarry Smith x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7; 413*f1af5d2fSBarry Smith x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7; 414*f1af5d2fSBarry Smith x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7; 415*f1af5d2fSBarry Smith v += 49; 416*f1af5d2fSBarry Smith } 417*f1af5d2fSBarry Smith x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5; 418*f1af5d2fSBarry Smith x[5+idx] = s6;x[6+idx] = s7; 419*f1af5d2fSBarry Smith idx += 7; 420*f1af5d2fSBarry Smith } 421*f1af5d2fSBarry Smith /* backward solve the L^T */ 422*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 423*f1af5d2fSBarry Smith v = aa + 49*diag[i] - 49; 424*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 425*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 426*f1af5d2fSBarry Smith idt = 7*i; 427*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt]; 428*f1af5d2fSBarry Smith s6 = x[5+idt];s7 = x[6+idt]; 429*f1af5d2fSBarry Smith while (nz--) { 430*f1af5d2fSBarry Smith idx = 7*(*vi--); 431*f1af5d2fSBarry Smith x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7; 432*f1af5d2fSBarry Smith x[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7; 433*f1af5d2fSBarry Smith x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7; 434*f1af5d2fSBarry Smith x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7; 435*f1af5d2fSBarry Smith x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7; 436*f1af5d2fSBarry Smith x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7; 437*f1af5d2fSBarry Smith x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7; 438*f1af5d2fSBarry Smith v -= 49; 439*f1af5d2fSBarry Smith } 440*f1af5d2fSBarry Smith } 441*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 442*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 443*f1af5d2fSBarry Smith PLogFlops(2*49*(a->nz) - 7*a->n); 444*f1af5d2fSBarry Smith PetscFunctionReturn(0); 445*f1af5d2fSBarry Smith } 446*f1af5d2fSBarry Smith 447*f1af5d2fSBarry Smith /*---------------------------------------------------------------------------------------------*/ 448*f1af5d2fSBarry Smith #undef __FUNC__ 449*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_1" 450*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 451*f1af5d2fSBarry Smith { 452*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 453*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 454*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 455*f1af5d2fSBarry Smith int *diag = a->diag; 456*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 457*f1af5d2fSBarry Smith Scalar s1,*x,*b,*t; 458*f1af5d2fSBarry Smith 459*f1af5d2fSBarry Smith PetscFunctionBegin; 460*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 461*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 462*f1af5d2fSBarry Smith t = a->solve_work; 463*f1af5d2fSBarry Smith 464*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 465*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 466*f1af5d2fSBarry Smith 467*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 468*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 469*f1af5d2fSBarry Smith t[i] = b[c[i]]; 470*f1af5d2fSBarry Smith } 471*f1af5d2fSBarry Smith 472*f1af5d2fSBarry Smith /* forward solve the U^T */ 473*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 474*f1af5d2fSBarry Smith 475*f1af5d2fSBarry Smith v = aa + diag[i]; 476*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 477*f1af5d2fSBarry Smith s1 = (*v++)*t[i]; 478*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 479*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 480*f1af5d2fSBarry Smith while (nz--) { 481*f1af5d2fSBarry Smith t[*vi++] -= (*v++)*s1; 482*f1af5d2fSBarry Smith } 483*f1af5d2fSBarry Smith t[i] = s1; 484*f1af5d2fSBarry Smith } 485*f1af5d2fSBarry Smith /* backward solve the L^T */ 486*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 487*f1af5d2fSBarry Smith v = aa + diag[i] - 1; 488*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 489*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 490*f1af5d2fSBarry Smith s1 = t[i]; 491*f1af5d2fSBarry Smith while (nz--) { 492*f1af5d2fSBarry Smith t[*vi--] -= (*v--)*s1; 493*f1af5d2fSBarry Smith } 494*f1af5d2fSBarry Smith } 495*f1af5d2fSBarry Smith 496*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 497*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 498*f1af5d2fSBarry Smith x[r[i]] = t[i]; 499*f1af5d2fSBarry Smith } 500*f1af5d2fSBarry Smith 501*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 502*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 503*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 504*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 505*f1af5d2fSBarry Smith PLogFlops(2*(a->nz) - a->n); 506*f1af5d2fSBarry Smith PetscFunctionReturn(0); 507*f1af5d2fSBarry Smith } 508*f1af5d2fSBarry Smith 509*f1af5d2fSBarry Smith #undef __FUNC__ 510*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_2" 511*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 512*f1af5d2fSBarry Smith { 513*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 514*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 515*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout; 516*f1af5d2fSBarry Smith int *diag = a->diag,ii,ic,ir,oidx; 517*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 518*f1af5d2fSBarry Smith Scalar s1,s2,x1,x2; 519*f1af5d2fSBarry Smith Scalar *x,*b,*t; 520*f1af5d2fSBarry Smith 521*f1af5d2fSBarry Smith PetscFunctionBegin; 522*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 523*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 524*f1af5d2fSBarry Smith t = a->solve_work; 525*f1af5d2fSBarry Smith 526*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 527*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 528*f1af5d2fSBarry Smith 529*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 530*f1af5d2fSBarry Smith ii = 0; 531*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 532*f1af5d2fSBarry Smith ic = 2*c[i]; 533*f1af5d2fSBarry Smith t[ii] = b[ic]; 534*f1af5d2fSBarry Smith t[ii+1] = b[ic+1]; 535*f1af5d2fSBarry Smith ii += 2; 536*f1af5d2fSBarry Smith } 537*f1af5d2fSBarry Smith 538*f1af5d2fSBarry Smith /* forward solve the U^T */ 539*f1af5d2fSBarry Smith idx = 0; 540*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 541*f1af5d2fSBarry Smith 542*f1af5d2fSBarry Smith v = aa + 4*diag[i]; 543*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 544*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 545*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2; 546*f1af5d2fSBarry Smith s2 = v[2]*x1 + v[3]*x2; 547*f1af5d2fSBarry Smith v += 4; 548*f1af5d2fSBarry Smith 549*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 550*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 551*f1af5d2fSBarry Smith while (nz--) { 552*f1af5d2fSBarry Smith oidx = 2*(*vi++); 553*f1af5d2fSBarry Smith t[oidx] -= v[0]*s1 + v[1]*s2; 554*f1af5d2fSBarry Smith t[oidx+1] -= v[2]*s1 + v[3]*s2; 555*f1af5d2fSBarry Smith v += 4; 556*f1af5d2fSBarry Smith } 557*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; 558*f1af5d2fSBarry Smith idx += 2; 559*f1af5d2fSBarry Smith } 560*f1af5d2fSBarry Smith /* backward solve the L^T */ 561*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 562*f1af5d2fSBarry Smith v = aa + 4*diag[i] - 4; 563*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 564*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 565*f1af5d2fSBarry Smith idt = 2*i; 566*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; 567*f1af5d2fSBarry Smith while (nz--) { 568*f1af5d2fSBarry Smith idx = 2*(*vi--); 569*f1af5d2fSBarry Smith t[idx] -= v[0]*s1 + v[1]*s2; 570*f1af5d2fSBarry Smith t[idx+1] -= v[2]*s1 + v[3]*s2; 571*f1af5d2fSBarry Smith v -= 4; 572*f1af5d2fSBarry Smith } 573*f1af5d2fSBarry Smith } 574*f1af5d2fSBarry Smith 575*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 576*f1af5d2fSBarry Smith ii = 0; 577*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 578*f1af5d2fSBarry Smith ir = 2*r[i]; 579*f1af5d2fSBarry Smith x[ir] = t[ii]; 580*f1af5d2fSBarry Smith x[ir+1] = t[ii+1]; 581*f1af5d2fSBarry Smith ii += 2; 582*f1af5d2fSBarry Smith } 583*f1af5d2fSBarry Smith 584*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 585*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 586*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 587*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 588*f1af5d2fSBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 589*f1af5d2fSBarry Smith PetscFunctionReturn(0); 590*f1af5d2fSBarry Smith } 591*f1af5d2fSBarry Smith 592*f1af5d2fSBarry Smith #undef __FUNC__ 593*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_3" 594*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 595*f1af5d2fSBarry Smith { 596*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 597*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 598*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout; 599*f1af5d2fSBarry Smith int *diag = a->diag,ii,ic,ir,oidx; 600*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 601*f1af5d2fSBarry Smith Scalar s1,s2,s3,x1,x2,x3; 602*f1af5d2fSBarry Smith Scalar *x,*b,*t; 603*f1af5d2fSBarry Smith 604*f1af5d2fSBarry Smith PetscFunctionBegin; 605*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 606*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 607*f1af5d2fSBarry Smith t = a->solve_work; 608*f1af5d2fSBarry Smith 609*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 610*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 611*f1af5d2fSBarry Smith 612*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 613*f1af5d2fSBarry Smith ii = 0; 614*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 615*f1af5d2fSBarry Smith ic = 3*c[i]; 616*f1af5d2fSBarry Smith t[ii] = b[ic]; 617*f1af5d2fSBarry Smith t[ii+1] = b[ic+1]; 618*f1af5d2fSBarry Smith t[ii+2] = b[ic+2]; 619*f1af5d2fSBarry Smith ii += 3; 620*f1af5d2fSBarry Smith } 621*f1af5d2fSBarry Smith 622*f1af5d2fSBarry Smith /* forward solve the U^T */ 623*f1af5d2fSBarry Smith idx = 0; 624*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 625*f1af5d2fSBarry Smith 626*f1af5d2fSBarry Smith v = aa + 9*diag[i]; 627*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 628*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 629*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3; 630*f1af5d2fSBarry Smith s2 = v[3]*x1 + v[4]*x2 + v[5]*x3; 631*f1af5d2fSBarry Smith s3 = v[6]*x1 + v[7]*x2 + v[8]*x3; 632*f1af5d2fSBarry Smith v += 9; 633*f1af5d2fSBarry Smith 634*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 635*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 636*f1af5d2fSBarry Smith while (nz--) { 637*f1af5d2fSBarry Smith oidx = 3*(*vi++); 638*f1af5d2fSBarry Smith t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3; 639*f1af5d2fSBarry Smith t[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3; 640*f1af5d2fSBarry Smith t[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3; 641*f1af5d2fSBarry Smith v += 9; 642*f1af5d2fSBarry Smith } 643*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3; 644*f1af5d2fSBarry Smith idx += 3; 645*f1af5d2fSBarry Smith } 646*f1af5d2fSBarry Smith /* backward solve the L^T */ 647*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 648*f1af5d2fSBarry Smith v = aa + 9*diag[i] - 9; 649*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 650*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 651*f1af5d2fSBarry Smith idt = 3*i; 652*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt]; 653*f1af5d2fSBarry Smith while (nz--) { 654*f1af5d2fSBarry Smith idx = 3*(*vi--); 655*f1af5d2fSBarry Smith t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3; 656*f1af5d2fSBarry Smith t[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3; 657*f1af5d2fSBarry Smith t[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3; 658*f1af5d2fSBarry Smith v -= 9; 659*f1af5d2fSBarry Smith } 660*f1af5d2fSBarry Smith } 661*f1af5d2fSBarry Smith 662*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 663*f1af5d2fSBarry Smith ii = 0; 664*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 665*f1af5d2fSBarry Smith ir = 3*r[i]; 666*f1af5d2fSBarry Smith x[ir] = t[ii]; 667*f1af5d2fSBarry Smith x[ir+1] = t[ii+1]; 668*f1af5d2fSBarry Smith x[ir+2] = t[ii+2]; 669*f1af5d2fSBarry Smith ii += 3; 670*f1af5d2fSBarry Smith } 671*f1af5d2fSBarry Smith 672*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 673*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 674*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 675*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 676*f1af5d2fSBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 677*f1af5d2fSBarry Smith PetscFunctionReturn(0); 678*f1af5d2fSBarry Smith } 679*f1af5d2fSBarry Smith 680*f1af5d2fSBarry Smith #undef __FUNC__ 681*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_4" 682*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 683*f1af5d2fSBarry Smith { 684*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 685*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 686*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout; 687*f1af5d2fSBarry Smith int *diag = a->diag,ii,ic,ir,oidx; 688*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 689*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,x1,x2,x3,x4; 690*f1af5d2fSBarry Smith Scalar *x,*b,*t; 691*f1af5d2fSBarry Smith 692*f1af5d2fSBarry Smith PetscFunctionBegin; 693*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 694*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 695*f1af5d2fSBarry Smith t = a->solve_work; 696*f1af5d2fSBarry Smith 697*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 698*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 699*f1af5d2fSBarry Smith 700*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 701*f1af5d2fSBarry Smith ii = 0; 702*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 703*f1af5d2fSBarry Smith ic = 4*c[i]; 704*f1af5d2fSBarry Smith t[ii] = b[ic]; 705*f1af5d2fSBarry Smith t[ii+1] = b[ic+1]; 706*f1af5d2fSBarry Smith t[ii+2] = b[ic+2]; 707*f1af5d2fSBarry Smith t[ii+3] = b[ic+3]; 708*f1af5d2fSBarry Smith ii += 4; 709*f1af5d2fSBarry Smith } 710*f1af5d2fSBarry Smith 711*f1af5d2fSBarry Smith /* forward solve the U^T */ 712*f1af5d2fSBarry Smith idx = 0; 713*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 714*f1af5d2fSBarry Smith 715*f1af5d2fSBarry Smith v = aa + 16*diag[i]; 716*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 717*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; 718*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 719*f1af5d2fSBarry Smith s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 720*f1af5d2fSBarry Smith s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 721*f1af5d2fSBarry Smith s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 722*f1af5d2fSBarry Smith v += 16; 723*f1af5d2fSBarry Smith 724*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 725*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 726*f1af5d2fSBarry Smith while (nz--) { 727*f1af5d2fSBarry Smith oidx = 4*(*vi++); 728*f1af5d2fSBarry Smith t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4; 729*f1af5d2fSBarry Smith t[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4; 730*f1af5d2fSBarry Smith t[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4; 731*f1af5d2fSBarry Smith t[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4; 732*f1af5d2fSBarry Smith v += 16; 733*f1af5d2fSBarry Smith } 734*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; 735*f1af5d2fSBarry Smith idx += 4; 736*f1af5d2fSBarry Smith } 737*f1af5d2fSBarry Smith /* backward solve the L^T */ 738*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 739*f1af5d2fSBarry Smith v = aa + 16*diag[i] - 16; 740*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 741*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 742*f1af5d2fSBarry Smith idt = 4*i; 743*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; 744*f1af5d2fSBarry Smith while (nz--) { 745*f1af5d2fSBarry Smith idx = 4*(*vi--); 746*f1af5d2fSBarry Smith t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4; 747*f1af5d2fSBarry Smith t[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4; 748*f1af5d2fSBarry Smith t[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4; 749*f1af5d2fSBarry Smith t[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4; 750*f1af5d2fSBarry Smith v -= 16; 751*f1af5d2fSBarry Smith } 752*f1af5d2fSBarry Smith } 753*f1af5d2fSBarry Smith 754*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 755*f1af5d2fSBarry Smith ii = 0; 756*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 757*f1af5d2fSBarry Smith ir = 4*r[i]; 758*f1af5d2fSBarry Smith x[ir] = t[ii]; 759*f1af5d2fSBarry Smith x[ir+1] = t[ii+1]; 760*f1af5d2fSBarry Smith x[ir+2] = t[ii+2]; 761*f1af5d2fSBarry Smith x[ir+3] = t[ii+3]; 762*f1af5d2fSBarry Smith ii += 4; 763*f1af5d2fSBarry Smith } 764*f1af5d2fSBarry Smith 765*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 766*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 767*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 768*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 769*f1af5d2fSBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 770*f1af5d2fSBarry Smith PetscFunctionReturn(0); 771*f1af5d2fSBarry Smith } 772*f1af5d2fSBarry Smith 773*f1af5d2fSBarry Smith #undef __FUNC__ 774*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_5" 775*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 776*f1af5d2fSBarry Smith { 777*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 778*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 779*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout; 780*f1af5d2fSBarry Smith int *diag = a->diag,ii,ic,ir,oidx; 781*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 782*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5; 783*f1af5d2fSBarry Smith Scalar *x,*b,*t; 784*f1af5d2fSBarry Smith 785*f1af5d2fSBarry Smith PetscFunctionBegin; 786*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 787*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 788*f1af5d2fSBarry Smith t = a->solve_work; 789*f1af5d2fSBarry Smith 790*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 791*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 792*f1af5d2fSBarry Smith 793*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 794*f1af5d2fSBarry Smith ii = 0; 795*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 796*f1af5d2fSBarry Smith ic = 5*c[i]; 797*f1af5d2fSBarry Smith t[ii] = b[ic]; 798*f1af5d2fSBarry Smith t[ii+1] = b[ic+1]; 799*f1af5d2fSBarry Smith t[ii+2] = b[ic+2]; 800*f1af5d2fSBarry Smith t[ii+3] = b[ic+3]; 801*f1af5d2fSBarry Smith t[ii+4] = b[ic+4]; 802*f1af5d2fSBarry Smith ii += 5; 803*f1af5d2fSBarry Smith } 804*f1af5d2fSBarry Smith 805*f1af5d2fSBarry Smith /* forward solve the U^T */ 806*f1af5d2fSBarry Smith idx = 0; 807*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 808*f1af5d2fSBarry Smith 809*f1af5d2fSBarry Smith v = aa + 25*diag[i]; 810*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 811*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 812*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 813*f1af5d2fSBarry Smith s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 814*f1af5d2fSBarry Smith s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 815*f1af5d2fSBarry Smith s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 816*f1af5d2fSBarry Smith s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 817*f1af5d2fSBarry Smith v += 25; 818*f1af5d2fSBarry Smith 819*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 820*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 821*f1af5d2fSBarry Smith while (nz--) { 822*f1af5d2fSBarry Smith oidx = 5*(*vi++); 823*f1af5d2fSBarry Smith t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5; 824*f1af5d2fSBarry Smith t[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5; 825*f1af5d2fSBarry Smith t[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5; 826*f1af5d2fSBarry Smith t[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5; 827*f1af5d2fSBarry Smith t[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5; 828*f1af5d2fSBarry Smith v += 25; 829*f1af5d2fSBarry Smith } 830*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 831*f1af5d2fSBarry Smith idx += 5; 832*f1af5d2fSBarry Smith } 833*f1af5d2fSBarry Smith /* backward solve the L^T */ 834*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 835*f1af5d2fSBarry Smith v = aa + 25*diag[i] - 25; 836*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 837*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 838*f1af5d2fSBarry Smith idt = 5*i; 839*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 840*f1af5d2fSBarry Smith while (nz--) { 841*f1af5d2fSBarry Smith idx = 5*(*vi--); 842*f1af5d2fSBarry Smith t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5; 843*f1af5d2fSBarry Smith t[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5; 844*f1af5d2fSBarry Smith t[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5; 845*f1af5d2fSBarry Smith t[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5; 846*f1af5d2fSBarry Smith t[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5; 847*f1af5d2fSBarry Smith v -= 25; 848*f1af5d2fSBarry Smith } 849*f1af5d2fSBarry Smith } 850*f1af5d2fSBarry Smith 851*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 852*f1af5d2fSBarry Smith ii = 0; 853*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 854*f1af5d2fSBarry Smith ir = 5*r[i]; 855*f1af5d2fSBarry Smith x[ir] = t[ii]; 856*f1af5d2fSBarry Smith x[ir+1] = t[ii+1]; 857*f1af5d2fSBarry Smith x[ir+2] = t[ii+2]; 858*f1af5d2fSBarry Smith x[ir+3] = t[ii+3]; 859*f1af5d2fSBarry Smith x[ir+4] = t[ii+4]; 860*f1af5d2fSBarry Smith ii += 5; 861*f1af5d2fSBarry Smith } 862*f1af5d2fSBarry Smith 863*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 864*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 865*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 866*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 867*f1af5d2fSBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 868*f1af5d2fSBarry Smith PetscFunctionReturn(0); 869*f1af5d2fSBarry Smith } 870*f1af5d2fSBarry Smith 871*f1af5d2fSBarry Smith #undef __FUNC__ 872*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_6" 873*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 874*f1af5d2fSBarry Smith { 875*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 876*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 877*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout; 878*f1af5d2fSBarry Smith int *diag = a->diag,ii,ic,ir,oidx; 879*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 880*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6; 881*f1af5d2fSBarry Smith Scalar *x,*b,*t; 882*f1af5d2fSBarry Smith 883*f1af5d2fSBarry Smith PetscFunctionBegin; 884*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 885*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 886*f1af5d2fSBarry Smith t = a->solve_work; 887*f1af5d2fSBarry Smith 888*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 889*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 890*f1af5d2fSBarry Smith 891*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 892*f1af5d2fSBarry Smith ii = 0; 893*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 894*f1af5d2fSBarry Smith ic = 6*c[i]; 895*f1af5d2fSBarry Smith t[ii] = b[ic]; 896*f1af5d2fSBarry Smith t[ii+1] = b[ic+1]; 897*f1af5d2fSBarry Smith t[ii+2] = b[ic+2]; 898*f1af5d2fSBarry Smith t[ii+3] = b[ic+3]; 899*f1af5d2fSBarry Smith t[ii+4] = b[ic+4]; 900*f1af5d2fSBarry Smith t[ii+5] = b[ic+5]; 901*f1af5d2fSBarry Smith ii += 6; 902*f1af5d2fSBarry Smith } 903*f1af5d2fSBarry Smith 904*f1af5d2fSBarry Smith /* forward solve the U^T */ 905*f1af5d2fSBarry Smith idx = 0; 906*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 907*f1af5d2fSBarry Smith 908*f1af5d2fSBarry Smith v = aa + 36*diag[i]; 909*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 910*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 911*f1af5d2fSBarry Smith x6 = t[5+idx]; 912*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6; 913*f1af5d2fSBarry Smith s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6; 914*f1af5d2fSBarry Smith s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6; 915*f1af5d2fSBarry Smith s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6; 916*f1af5d2fSBarry Smith s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6; 917*f1af5d2fSBarry Smith s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6; 918*f1af5d2fSBarry Smith v += 36; 919*f1af5d2fSBarry Smith 920*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 921*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 922*f1af5d2fSBarry Smith while (nz--) { 923*f1af5d2fSBarry Smith oidx = 6*(*vi++); 924*f1af5d2fSBarry Smith t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6; 925*f1af5d2fSBarry Smith t[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6; 926*f1af5d2fSBarry Smith t[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6; 927*f1af5d2fSBarry Smith t[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6; 928*f1af5d2fSBarry Smith t[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6; 929*f1af5d2fSBarry Smith t[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6; 930*f1af5d2fSBarry Smith v += 36; 931*f1af5d2fSBarry Smith } 932*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 933*f1af5d2fSBarry Smith t[5+idx] = s6; 934*f1af5d2fSBarry Smith idx += 6; 935*f1af5d2fSBarry Smith } 936*f1af5d2fSBarry Smith /* backward solve the L^T */ 937*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 938*f1af5d2fSBarry Smith v = aa + 36*diag[i] - 36; 939*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 940*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 941*f1af5d2fSBarry Smith idt = 6*i; 942*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 943*f1af5d2fSBarry Smith s6 = t[5+idt]; 944*f1af5d2fSBarry Smith while (nz--) { 945*f1af5d2fSBarry Smith idx = 6*(*vi--); 946*f1af5d2fSBarry Smith t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6; 947*f1af5d2fSBarry Smith t[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6; 948*f1af5d2fSBarry Smith t[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6; 949*f1af5d2fSBarry Smith t[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6; 950*f1af5d2fSBarry Smith t[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6; 951*f1af5d2fSBarry Smith t[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6; 952*f1af5d2fSBarry Smith v -= 36; 953*f1af5d2fSBarry Smith } 954*f1af5d2fSBarry Smith } 955*f1af5d2fSBarry Smith 956*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 957*f1af5d2fSBarry Smith ii = 0; 958*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 959*f1af5d2fSBarry Smith ir = 6*r[i]; 960*f1af5d2fSBarry Smith x[ir] = t[ii]; 961*f1af5d2fSBarry Smith x[ir+1] = t[ii+1]; 962*f1af5d2fSBarry Smith x[ir+2] = t[ii+2]; 963*f1af5d2fSBarry Smith x[ir+3] = t[ii+3]; 964*f1af5d2fSBarry Smith x[ir+4] = t[ii+4]; 965*f1af5d2fSBarry Smith x[ir+5] = t[ii+5]; 966*f1af5d2fSBarry Smith ii += 6; 967*f1af5d2fSBarry Smith } 968*f1af5d2fSBarry Smith 969*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 970*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 971*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 972*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 973*f1af5d2fSBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 974*f1af5d2fSBarry Smith PetscFunctionReturn(0); 975*f1af5d2fSBarry Smith } 976*f1af5d2fSBarry Smith 977*f1af5d2fSBarry Smith #undef __FUNC__ 978*f1af5d2fSBarry Smith #define __FUNC__ "MatSolveTrans_SeqBAIJ_7" 979*f1af5d2fSBarry Smith int MatSolveTrans_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 980*f1af5d2fSBarry Smith { 981*f1af5d2fSBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 982*f1af5d2fSBarry Smith IS iscol=a->col,isrow=a->row; 983*f1af5d2fSBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout; 984*f1af5d2fSBarry Smith int *diag = a->diag,ii,ic,ir,oidx; 985*f1af5d2fSBarry Smith MatScalar *aa=a->a,*v; 986*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7; 987*f1af5d2fSBarry Smith Scalar *x,*b,*t; 988*f1af5d2fSBarry Smith 989*f1af5d2fSBarry Smith PetscFunctionBegin; 990*f1af5d2fSBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 991*f1af5d2fSBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 992*f1af5d2fSBarry Smith t = a->solve_work; 993*f1af5d2fSBarry Smith 994*f1af5d2fSBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 995*f1af5d2fSBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 996*f1af5d2fSBarry Smith 997*f1af5d2fSBarry Smith /* copy the b into temp work space according to permutation */ 998*f1af5d2fSBarry Smith ii = 0; 999*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 1000*f1af5d2fSBarry Smith ic = 7*c[i]; 1001*f1af5d2fSBarry Smith t[ii] = b[ic]; 1002*f1af5d2fSBarry Smith t[ii+1] = b[ic+1]; 1003*f1af5d2fSBarry Smith t[ii+2] = b[ic+2]; 1004*f1af5d2fSBarry Smith t[ii+3] = b[ic+3]; 1005*f1af5d2fSBarry Smith t[ii+4] = b[ic+4]; 1006*f1af5d2fSBarry Smith t[ii+5] = b[ic+5]; 1007*f1af5d2fSBarry Smith t[ii+6] = b[ic+6]; 1008*f1af5d2fSBarry Smith ii += 7; 1009*f1af5d2fSBarry Smith } 1010*f1af5d2fSBarry Smith 1011*f1af5d2fSBarry Smith /* forward solve the U^T */ 1012*f1af5d2fSBarry Smith idx = 0; 1013*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 1014*f1af5d2fSBarry Smith 1015*f1af5d2fSBarry Smith v = aa + 49*diag[i]; 1016*f1af5d2fSBarry Smith /* multiply by the inverse of the block diagonal */ 1017*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 1018*f1af5d2fSBarry Smith x6 = t[5+idx]; x7 = t[6+idx]; 1019*f1af5d2fSBarry Smith s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7; 1020*f1af5d2fSBarry Smith s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7; 1021*f1af5d2fSBarry Smith s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7; 1022*f1af5d2fSBarry Smith s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7; 1023*f1af5d2fSBarry Smith s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7; 1024*f1af5d2fSBarry Smith s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7; 1025*f1af5d2fSBarry Smith s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7; 1026*f1af5d2fSBarry Smith v += 49; 1027*f1af5d2fSBarry Smith 1028*f1af5d2fSBarry Smith vi = aj + diag[i] + 1; 1029*f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 1030*f1af5d2fSBarry Smith while (nz--) { 1031*f1af5d2fSBarry Smith oidx = 7*(*vi++); 1032*f1af5d2fSBarry Smith t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7; 1033*f1af5d2fSBarry Smith t[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7; 1034*f1af5d2fSBarry Smith t[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7; 1035*f1af5d2fSBarry Smith t[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7; 1036*f1af5d2fSBarry Smith t[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7; 1037*f1af5d2fSBarry Smith t[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7; 1038*f1af5d2fSBarry Smith t[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7; 1039*f1af5d2fSBarry Smith v += 49; 1040*f1af5d2fSBarry Smith } 1041*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 1042*f1af5d2fSBarry Smith t[5+idx] = s6;t[6+idx] = s7; 1043*f1af5d2fSBarry Smith idx += 7; 1044*f1af5d2fSBarry Smith } 1045*f1af5d2fSBarry Smith /* backward solve the L^T */ 1046*f1af5d2fSBarry Smith for ( i=n-1; i>=0; i-- ){ 1047*f1af5d2fSBarry Smith v = aa + 49*diag[i] - 49; 1048*f1af5d2fSBarry Smith vi = aj + diag[i] - 1; 1049*f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1050*f1af5d2fSBarry Smith idt = 7*i; 1051*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 1052*f1af5d2fSBarry Smith s6 = t[5+idt];s7 = t[6+idt]; 1053*f1af5d2fSBarry Smith while (nz--) { 1054*f1af5d2fSBarry Smith idx = 7*(*vi--); 1055*f1af5d2fSBarry Smith t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7; 1056*f1af5d2fSBarry Smith t[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7; 1057*f1af5d2fSBarry Smith t[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7; 1058*f1af5d2fSBarry Smith t[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7; 1059*f1af5d2fSBarry Smith t[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7; 1060*f1af5d2fSBarry Smith t[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7; 1061*f1af5d2fSBarry Smith t[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7; 1062*f1af5d2fSBarry Smith v -= 49; 1063*f1af5d2fSBarry Smith } 1064*f1af5d2fSBarry Smith } 1065*f1af5d2fSBarry Smith 1066*f1af5d2fSBarry Smith /* copy t into x according to permutation */ 1067*f1af5d2fSBarry Smith ii = 0; 1068*f1af5d2fSBarry Smith for ( i=0; i<n; i++ ) { 1069*f1af5d2fSBarry Smith ir = 7*r[i]; 1070*f1af5d2fSBarry Smith x[ir] = t[ii]; 1071*f1af5d2fSBarry Smith x[ir+1] = t[ii+1]; 1072*f1af5d2fSBarry Smith x[ir+2] = t[ii+2]; 1073*f1af5d2fSBarry Smith x[ir+3] = t[ii+3]; 1074*f1af5d2fSBarry Smith x[ir+4] = t[ii+4]; 1075*f1af5d2fSBarry Smith x[ir+5] = t[ii+5]; 1076*f1af5d2fSBarry Smith x[ir+6] = t[ii+6]; 1077*f1af5d2fSBarry Smith ii += 7; 1078*f1af5d2fSBarry Smith } 1079*f1af5d2fSBarry Smith 1080*f1af5d2fSBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1081*f1af5d2fSBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1082*f1af5d2fSBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1083*f1af5d2fSBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1084*f1af5d2fSBarry Smith PLogFlops(2*49*(a->nz) - 7*a->n); 1085*f1af5d2fSBarry Smith PetscFunctionReturn(0); 1086*f1af5d2fSBarry Smith } 1087*f1af5d2fSBarry Smith 10884e2b4712SSatish Balay /* ----------------------------------------------------------- */ 10894e2b4712SSatish Balay #undef __FUNC__ 10904e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_N" 10914e2b4712SSatish Balay int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 10924e2b4712SSatish Balay { 10934e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 10944e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 10954e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j; 10964e2b4712SSatish Balay int nz,bs=a->bs,bs2=a->bs2,*rout,*cout; 10973f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 1098*f1af5d2fSBarry Smith Scalar *x,*b,*s,*t,*ls; 10994e2b4712SSatish Balay 11004e2b4712SSatish Balay PetscFunctionBegin; 1101e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1102e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1103*f1af5d2fSBarry Smith t = a->solve_work; 11044e2b4712SSatish Balay 11054e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 11064e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 11074e2b4712SSatish Balay 11084e2b4712SSatish Balay /* forward solve the lower triangular */ 1109*f1af5d2fSBarry Smith ierr = PetscMemcpy(t,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 11104e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 11114e2b4712SSatish Balay v = aa + bs2*ai[i]; 11124e2b4712SSatish Balay vi = aj + ai[i]; 11134e2b4712SSatish Balay nz = a->diag[i] - ai[i]; 1114*f1af5d2fSBarry Smith s = t + bs*i; 1115*f1af5d2fSBarry Smith ierr = PetscMemcpy(s,b+bs*(*r++),bs*sizeof(Scalar));CHKERRQ(ierr); 11164e2b4712SSatish Balay while (nz--) { 1117*f1af5d2fSBarry Smith Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*(*vi++)); 11184e2b4712SSatish Balay v += bs2; 11194e2b4712SSatish Balay } 11204e2b4712SSatish Balay } 11214e2b4712SSatish Balay /* backward solve the upper triangular */ 1122*f1af5d2fSBarry Smith ls = a->solve_work + a->n; 11234e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 11244e2b4712SSatish Balay v = aa + bs2*(a->diag[i] + 1); 11254e2b4712SSatish Balay vi = aj + a->diag[i] + 1; 11264e2b4712SSatish Balay nz = ai[i+1] - a->diag[i] - 1; 1127*f1af5d2fSBarry Smith ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 11284e2b4712SSatish Balay while (nz--) { 1129*f1af5d2fSBarry Smith Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*(*vi++)); 11304e2b4712SSatish Balay v += bs2; 11314e2b4712SSatish Balay } 1132*f1af5d2fSBarry Smith Kernel_w_gets_A_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs); 1133*f1af5d2fSBarry Smith ierr = PetscMemcpy(x + bs*(*c--),t+i*bs,bs*sizeof(Scalar));CHKERRQ(ierr); 11344e2b4712SSatish Balay } 11354e2b4712SSatish Balay 11364e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 11374e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1138e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1139e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 114042015d63SBarry Smith PLogFlops(2*(a->bs2)*(a->nz) - a->bs*a->n); 11414e2b4712SSatish Balay PetscFunctionReturn(0); 11424e2b4712SSatish Balay } 11434e2b4712SSatish Balay 11444e2b4712SSatish Balay #undef __FUNC__ 11454e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_7" 11464e2b4712SSatish Balay int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx) 11474e2b4712SSatish Balay { 11484e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 11494e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 11504e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 11514e2b4712SSatish Balay int *diag = a->diag; 11523f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 1153*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7; 1154*f1af5d2fSBarry Smith Scalar *x,*b,*t; 11554e2b4712SSatish Balay 11564e2b4712SSatish Balay PetscFunctionBegin; 1157e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159*f1af5d2fSBarry Smith t = a->solve_work; 11604e2b4712SSatish Balay 11614e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 11624e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 11634e2b4712SSatish Balay 11644e2b4712SSatish Balay /* forward solve the lower triangular */ 11654e2b4712SSatish Balay idx = 7*(*r++); 1166*f1af5d2fSBarry Smith t[0] = b[idx]; t[1] = b[1+idx]; 1167*f1af5d2fSBarry Smith t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx]; 1168*f1af5d2fSBarry Smith t[5] = b[5+idx]; t[6] = b[6+idx]; 11694e2b4712SSatish Balay 11704e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 11714e2b4712SSatish Balay v = aa + 49*ai[i]; 11724e2b4712SSatish Balay vi = aj + ai[i]; 11734e2b4712SSatish Balay nz = diag[i] - ai[i]; 11744e2b4712SSatish Balay idx = 7*(*r++); 1175*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 1176*f1af5d2fSBarry Smith s5 = b[4+idx];s6 = b[5+idx];s7 = b[6+idx]; 11774e2b4712SSatish Balay while (nz--) { 11784e2b4712SSatish Balay idx = 7*(*vi++); 1179*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx]; 1180*f1af5d2fSBarry Smith x4 = t[3+idx];x5 = t[4+idx]; 1181*f1af5d2fSBarry Smith x6 = t[5+idx];x7 = t[6+idx]; 1182*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1183*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1184*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1185*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1186*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1187*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1188*f1af5d2fSBarry Smith s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 11894e2b4712SSatish Balay v += 49; 11904e2b4712SSatish Balay } 11914e2b4712SSatish Balay idx = 7*i; 1192*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; 1193*f1af5d2fSBarry Smith t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 1194*f1af5d2fSBarry Smith t[5+idx] = s6;t[6+idx] = s7; 11954e2b4712SSatish Balay } 11964e2b4712SSatish Balay /* backward solve the upper triangular */ 11974e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 11984e2b4712SSatish Balay v = aa + 49*diag[i] + 49; 11994e2b4712SSatish Balay vi = aj + diag[i] + 1; 12004e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 12014e2b4712SSatish Balay idt = 7*i; 1202*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; 1203*f1af5d2fSBarry Smith s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 1204*f1af5d2fSBarry Smith s6 = t[5+idt];s7 = t[6+idt]; 12054e2b4712SSatish Balay while (nz--) { 12064e2b4712SSatish Balay idx = 7*(*vi++); 1207*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 1208*f1af5d2fSBarry Smith x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 1209*f1af5d2fSBarry Smith x6 = t[5+idx]; x7 = t[6+idx]; 1210*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1211*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1212*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1213*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1214*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1215*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1216*f1af5d2fSBarry Smith s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 12174e2b4712SSatish Balay v += 49; 12184e2b4712SSatish Balay } 12194e2b4712SSatish Balay idc = 7*(*c--); 12204e2b4712SSatish Balay v = aa + 49*diag[i]; 1221*f1af5d2fSBarry Smith x[idc] = t[idt] = v[0]*s1+v[7]*s2+v[14]*s3+ 1222*f1af5d2fSBarry Smith v[21]*s4+v[28]*s5+v[35]*s6+v[42]*s7; 1223*f1af5d2fSBarry Smith x[1+idc] = t[1+idt] = v[1]*s1+v[8]*s2+v[15]*s3+ 1224*f1af5d2fSBarry Smith v[22]*s4+v[29]*s5+v[36]*s6+v[43]*s7; 1225*f1af5d2fSBarry Smith x[2+idc] = t[2+idt] = v[2]*s1+v[9]*s2+v[16]*s3+ 1226*f1af5d2fSBarry Smith v[23]*s4+v[30]*s5+v[37]*s6+v[44]*s7; 1227*f1af5d2fSBarry Smith x[3+idc] = t[3+idt] = v[3]*s1+v[10]*s2+v[17]*s3+ 1228*f1af5d2fSBarry Smith v[24]*s4+v[31]*s5+v[38]*s6+v[45]*s7; 1229*f1af5d2fSBarry Smith x[4+idc] = t[4+idt] = v[4]*s1+v[11]*s2+v[18]*s3+ 1230*f1af5d2fSBarry Smith v[25]*s4+v[32]*s5+v[39]*s6+v[46]*s7; 1231*f1af5d2fSBarry Smith x[5+idc] = t[5+idt] = v[5]*s1+v[12]*s2+v[19]*s3+ 1232*f1af5d2fSBarry Smith v[26]*s4+v[33]*s5+v[40]*s6+v[47]*s7; 1233*f1af5d2fSBarry Smith x[6+idc] = t[6+idt] = v[6]*s1+v[13]*s2+v[20]*s3+ 1234*f1af5d2fSBarry Smith v[27]*s4+v[34]*s5+v[41]*s6+v[48]*s7; 12354e2b4712SSatish Balay } 12364e2b4712SSatish Balay 12374e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 12384e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1239e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1240e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 124142015d63SBarry Smith PLogFlops(2*49*(a->nz) - 7*a->n); 12424e2b4712SSatish Balay PetscFunctionReturn(0); 12434e2b4712SSatish Balay } 12444e2b4712SSatish Balay 12454e2b4712SSatish Balay #undef __FUNC__ 124615091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_7_NaturalOrdering" 124715091d37SBarry Smith int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx) 124815091d37SBarry Smith { 124915091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 125015091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 125115091d37SBarry Smith int ierr,*diag = a->diag,jdx; 125215091d37SBarry Smith MatScalar *aa=a->a,*v; 1253*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7; 125415091d37SBarry Smith 125515091d37SBarry Smith PetscFunctionBegin; 125615091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 125715091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 125815091d37SBarry Smith /* forward solve the lower triangular */ 125915091d37SBarry Smith idx = 0; 126015091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 126115091d37SBarry Smith x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 126215091d37SBarry Smith x[6] = b[6+idx]; 126315091d37SBarry Smith for ( i=1; i<n; i++ ) { 126415091d37SBarry Smith v = aa + 49*ai[i]; 126515091d37SBarry Smith vi = aj + ai[i]; 126615091d37SBarry Smith nz = diag[i] - ai[i]; 126715091d37SBarry Smith idx = 7*i; 1268*f1af5d2fSBarry Smith s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx]; 1269*f1af5d2fSBarry Smith s4 = b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx]; 1270*f1af5d2fSBarry Smith s7 = b[6+idx]; 127115091d37SBarry Smith while (nz--) { 127215091d37SBarry Smith jdx = 7*(*vi++); 127315091d37SBarry Smith x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 127415091d37SBarry Smith x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 127515091d37SBarry Smith x7 = x[6+jdx]; 1276*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1277*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1278*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1279*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1280*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1281*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1282*f1af5d2fSBarry Smith s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 128315091d37SBarry Smith v += 49; 128415091d37SBarry Smith } 1285*f1af5d2fSBarry Smith x[idx] = s1; 1286*f1af5d2fSBarry Smith x[1+idx] = s2; 1287*f1af5d2fSBarry Smith x[2+idx] = s3; 1288*f1af5d2fSBarry Smith x[3+idx] = s4; 1289*f1af5d2fSBarry Smith x[4+idx] = s5; 1290*f1af5d2fSBarry Smith x[5+idx] = s6; 1291*f1af5d2fSBarry Smith x[6+idx] = s7; 129215091d37SBarry Smith } 129315091d37SBarry Smith /* backward solve the upper triangular */ 129415091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 129515091d37SBarry Smith v = aa + 49*diag[i] + 49; 129615091d37SBarry Smith vi = aj + diag[i] + 1; 129715091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 129815091d37SBarry Smith idt = 7*i; 1299*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 1300*f1af5d2fSBarry Smith s3 = x[2+idt]; s4 = x[3+idt]; 1301*f1af5d2fSBarry Smith s5 = x[4+idt]; s6 = x[5+idt]; 1302*f1af5d2fSBarry Smith s7 = x[6+idt]; 130315091d37SBarry Smith while (nz--) { 130415091d37SBarry Smith idx = 7*(*vi++); 130515091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 130615091d37SBarry Smith x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 130715091d37SBarry Smith x7 = x[6+idx]; 1308*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1309*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1310*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1311*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1312*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1313*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1314*f1af5d2fSBarry Smith s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 131515091d37SBarry Smith v += 49; 131615091d37SBarry Smith } 131715091d37SBarry Smith v = aa + 49*diag[i]; 1318*f1af5d2fSBarry Smith x[idt] = v[0]*s1 + v[7]*s2 + v[14]*s3 + v[21]*s4 1319*f1af5d2fSBarry Smith + v[28]*s5 + v[35]*s6 + v[42]*s7; 1320*f1af5d2fSBarry Smith x[1+idt] = v[1]*s1 + v[8]*s2 + v[15]*s3 + v[22]*s4 1321*f1af5d2fSBarry Smith + v[29]*s5 + v[36]*s6 + v[43]*s7; 1322*f1af5d2fSBarry Smith x[2+idt] = v[2]*s1 + v[9]*s2 + v[16]*s3 + v[23]*s4 1323*f1af5d2fSBarry Smith + v[30]*s5 + v[37]*s6 + v[44]*s7; 1324*f1af5d2fSBarry Smith x[3+idt] = v[3]*s1 + v[10]*s2 + v[17]*s3 + v[24]*s4 1325*f1af5d2fSBarry Smith + v[31]*s5 + v[38]*s6 + v[45]*s7; 1326*f1af5d2fSBarry Smith x[4+idt] = v[4]*s1 + v[11]*s2 + v[18]*s3 + v[25]*s4 1327*f1af5d2fSBarry Smith + v[32]*s5 + v[39]*s6 + v[46]*s7; 1328*f1af5d2fSBarry Smith x[5+idt] = v[5]*s1 + v[12]*s2 + v[19]*s3 + v[26]*s4 1329*f1af5d2fSBarry Smith + v[33]*s5 + v[40]*s6 + v[47]*s7; 1330*f1af5d2fSBarry Smith x[6+idt] = v[6]*s1 + v[13]*s2 + v[20]*s3 + v[27]*s4 1331*f1af5d2fSBarry Smith + v[34]*s5 + v[41]*s6 + v[48]*s7; 133215091d37SBarry Smith } 133315091d37SBarry Smith 133415091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 133515091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 133615091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 133715091d37SBarry Smith PetscFunctionReturn(0); 133815091d37SBarry Smith } 133915091d37SBarry Smith 134015091d37SBarry Smith #undef __FUNC__ 134115091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6" 134215091d37SBarry Smith int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx) 134315091d37SBarry Smith { 134415091d37SBarry Smith Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 134515091d37SBarry Smith IS iscol=a->col,isrow=a->row; 134615091d37SBarry Smith int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 134715091d37SBarry Smith int *diag = a->diag; 134815091d37SBarry Smith MatScalar *aa=a->a,*v; 1349*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*t; 135015091d37SBarry Smith 135115091d37SBarry Smith PetscFunctionBegin; 135215091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 135315091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1354*f1af5d2fSBarry Smith t = a->solve_work; 135515091d37SBarry Smith 135615091d37SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 135715091d37SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 135815091d37SBarry Smith 135915091d37SBarry Smith /* forward solve the lower triangular */ 136015091d37SBarry Smith idx = 6*(*r++); 1361*f1af5d2fSBarry Smith t[0] = b[idx]; t[1] = b[1+idx]; 1362*f1af5d2fSBarry Smith t[2] = b[2+idx]; t[3] = b[3+idx]; 1363*f1af5d2fSBarry Smith t[4] = b[4+idx]; t[5] = b[5+idx]; 136415091d37SBarry Smith for ( i=1; i<n; i++ ) { 136515091d37SBarry Smith v = aa + 36*ai[i]; 136615091d37SBarry Smith vi = aj + ai[i]; 136715091d37SBarry Smith nz = diag[i] - ai[i]; 136815091d37SBarry Smith idx = 6*(*r++); 1369*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 1370*f1af5d2fSBarry Smith s5 = b[4+idx]; s6 = b[5+idx]; 137115091d37SBarry Smith while (nz--) { 137215091d37SBarry Smith idx = 6*(*vi++); 1373*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 1374*f1af5d2fSBarry Smith x4 = t[3+idx]; x5 = t[4+idx]; x6 = t[5+idx]; 1375*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1376*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1377*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1378*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1379*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1380*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 138115091d37SBarry Smith v += 36; 138215091d37SBarry Smith } 138315091d37SBarry Smith idx = 6*i; 1384*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; 1385*f1af5d2fSBarry Smith t[2+idx] = s3;t[3+idx] = s4; 1386*f1af5d2fSBarry Smith t[4+idx] = s5;t[5+idx] = s6; 138715091d37SBarry Smith } 138815091d37SBarry Smith /* backward solve the upper triangular */ 138915091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 139015091d37SBarry Smith v = aa + 36*diag[i] + 36; 139115091d37SBarry Smith vi = aj + diag[i] + 1; 139215091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 139315091d37SBarry Smith idt = 6*i; 1394*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; 1395*f1af5d2fSBarry Smith s3 = t[2+idt];s4 = t[3+idt]; 1396*f1af5d2fSBarry Smith s5 = t[4+idt];s6 = t[5+idt]; 139715091d37SBarry Smith while (nz--) { 139815091d37SBarry Smith idx = 6*(*vi++); 1399*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 1400*f1af5d2fSBarry Smith x3 = t[2+idx]; x4 = t[3+idx]; 1401*f1af5d2fSBarry Smith x5 = t[4+idx]; x6 = t[5+idx]; 1402*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1403*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1404*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1405*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1406*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1407*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 140815091d37SBarry Smith v += 36; 140915091d37SBarry Smith } 141015091d37SBarry Smith idc = 6*(*c--); 141115091d37SBarry Smith v = aa + 36*diag[i]; 1412*f1af5d2fSBarry Smith x[idc] = t[idt] = v[0]*s1+v[6]*s2+v[12]*s3+ 1413*f1af5d2fSBarry Smith v[18]*s4+v[24]*s5+v[30]*s6; 1414*f1af5d2fSBarry Smith x[1+idc] = t[1+idt] = v[1]*s1+v[7]*s2+v[13]*s3+ 1415*f1af5d2fSBarry Smith v[19]*s4+v[25]*s5+v[31]*s6; 1416*f1af5d2fSBarry Smith x[2+idc] = t[2+idt] = v[2]*s1+v[8]*s2+v[14]*s3+ 1417*f1af5d2fSBarry Smith v[20]*s4+v[26]*s5+v[32]*s6; 1418*f1af5d2fSBarry Smith x[3+idc] = t[3+idt] = v[3]*s1+v[9]*s2+v[15]*s3+ 1419*f1af5d2fSBarry Smith v[21]*s4+v[27]*s5+v[33]*s6; 1420*f1af5d2fSBarry Smith x[4+idc] = t[4+idt] = v[4]*s1+v[10]*s2+v[16]*s3+ 1421*f1af5d2fSBarry Smith v[22]*s4+v[28]*s5+v[34]*s6; 1422*f1af5d2fSBarry Smith x[5+idc] = t[5+idt] = v[5]*s1+v[11]*s2+v[17]*s3+ 1423*f1af5d2fSBarry Smith v[23]*s4+v[29]*s5+v[35]*s6; 142415091d37SBarry Smith } 142515091d37SBarry Smith 142615091d37SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 142715091d37SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 142815091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 142915091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 143015091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 143115091d37SBarry Smith PetscFunctionReturn(0); 143215091d37SBarry Smith } 143315091d37SBarry Smith 143415091d37SBarry Smith #undef __FUNC__ 143515091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_6_NaturalOrdering" 143615091d37SBarry Smith int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx) 143715091d37SBarry Smith { 143815091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 143915091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 144015091d37SBarry Smith int ierr,*diag = a->diag,jdx; 144115091d37SBarry Smith MatScalar *aa=a->a,*v; 1442*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6; 144315091d37SBarry Smith 144415091d37SBarry Smith PetscFunctionBegin; 144515091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 144615091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 144715091d37SBarry Smith /* forward solve the lower triangular */ 144815091d37SBarry Smith idx = 0; 144915091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; 145015091d37SBarry Smith x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx]; 145115091d37SBarry Smith for ( i=1; i<n; i++ ) { 145215091d37SBarry Smith v = aa + 36*ai[i]; 145315091d37SBarry Smith vi = aj + ai[i]; 145415091d37SBarry Smith nz = diag[i] - ai[i]; 145515091d37SBarry Smith idx = 6*i; 1456*f1af5d2fSBarry Smith s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx]; 1457*f1af5d2fSBarry Smith s4 = b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx]; 145815091d37SBarry Smith while (nz--) { 145915091d37SBarry Smith jdx = 6*(*vi++); 146015091d37SBarry Smith x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx]; 146115091d37SBarry Smith x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx]; 1462*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1463*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1464*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1465*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1466*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1467*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 146815091d37SBarry Smith v += 36; 146915091d37SBarry Smith } 1470*f1af5d2fSBarry Smith x[idx] = s1; 1471*f1af5d2fSBarry Smith x[1+idx] = s2; 1472*f1af5d2fSBarry Smith x[2+idx] = s3; 1473*f1af5d2fSBarry Smith x[3+idx] = s4; 1474*f1af5d2fSBarry Smith x[4+idx] = s5; 1475*f1af5d2fSBarry Smith x[5+idx] = s6; 147615091d37SBarry Smith } 147715091d37SBarry Smith /* backward solve the upper triangular */ 147815091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 147915091d37SBarry Smith v = aa + 36*diag[i] + 36; 148015091d37SBarry Smith vi = aj + diag[i] + 1; 148115091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 148215091d37SBarry Smith idt = 6*i; 1483*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 1484*f1af5d2fSBarry Smith s3 = x[2+idt]; s4 = x[3+idt]; 1485*f1af5d2fSBarry Smith s5 = x[4+idt]; s6 = x[5+idt]; 148615091d37SBarry Smith while (nz--) { 148715091d37SBarry Smith idx = 6*(*vi++); 148815091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 148915091d37SBarry Smith x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 1490*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1491*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1492*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1493*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1494*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1495*f1af5d2fSBarry Smith s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 149615091d37SBarry Smith v += 36; 149715091d37SBarry Smith } 149815091d37SBarry Smith v = aa + 36*diag[i]; 1499*f1af5d2fSBarry Smith x[idt] = v[0]*s1 + v[6]*s2 + v[12]*s3 + v[18]*s4 + v[24]*s5 + v[30]*s6; 1500*f1af5d2fSBarry Smith x[1+idt] = v[1]*s1 + v[7]*s2 + v[13]*s3 + v[19]*s4 + v[25]*s5 + v[31]*s6; 1501*f1af5d2fSBarry Smith x[2+idt] = v[2]*s1 + v[8]*s2 + v[14]*s3 + v[20]*s4 + v[26]*s5 + v[32]*s6; 1502*f1af5d2fSBarry Smith x[3+idt] = v[3]*s1 + v[9]*s2 + v[15]*s3 + v[21]*s4 + v[27]*s5 + v[33]*s6; 1503*f1af5d2fSBarry Smith x[4+idt] = v[4]*s1 + v[10]*s2 + v[16]*s3 + v[22]*s4 + v[28]*s5 + v[34]*s6; 1504*f1af5d2fSBarry Smith x[5+idt] = v[5]*s1 + v[11]*s2 + v[17]*s3 + v[23]*s4 + v[29]*s5 + v[35]*s6; 150515091d37SBarry Smith } 150615091d37SBarry Smith 150715091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 150815091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 150915091d37SBarry Smith PLogFlops(2*36*(a->nz) - 6*a->n); 151015091d37SBarry Smith PetscFunctionReturn(0); 151115091d37SBarry Smith } 151215091d37SBarry Smith 151315091d37SBarry Smith #undef __FUNC__ 15144e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_5" 15154e2b4712SSatish Balay int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx) 15164e2b4712SSatish Balay { 15174e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 15184e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 15194e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 15204e2b4712SSatish Balay int *diag = a->diag; 15213f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 1522*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*t; 15234e2b4712SSatish Balay 15244e2b4712SSatish Balay PetscFunctionBegin; 1525e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1526e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1527*f1af5d2fSBarry Smith t = a->solve_work; 15284e2b4712SSatish Balay 15294e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 15304e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 15314e2b4712SSatish Balay 15324e2b4712SSatish Balay /* forward solve the lower triangular */ 15334e2b4712SSatish Balay idx = 5*(*r++); 1534*f1af5d2fSBarry Smith t[0] = b[idx]; t[1] = b[1+idx]; 1535*f1af5d2fSBarry Smith t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx]; 15364e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 15374e2b4712SSatish Balay v = aa + 25*ai[i]; 15384e2b4712SSatish Balay vi = aj + ai[i]; 15394e2b4712SSatish Balay nz = diag[i] - ai[i]; 15404e2b4712SSatish Balay idx = 5*(*r++); 1541*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 1542*f1af5d2fSBarry Smith s5 = b[4+idx]; 15434e2b4712SSatish Balay while (nz--) { 15444e2b4712SSatish Balay idx = 5*(*vi++); 1545*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx]; 1546*f1af5d2fSBarry Smith x4 = t[3+idx];x5 = t[4+idx]; 1547*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1548*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1549*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1550*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1551*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 15524e2b4712SSatish Balay v += 25; 15534e2b4712SSatish Balay } 15544e2b4712SSatish Balay idx = 5*i; 1555*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; 1556*f1af5d2fSBarry Smith t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5; 15574e2b4712SSatish Balay } 15584e2b4712SSatish Balay /* backward solve the upper triangular */ 15594e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 15604e2b4712SSatish Balay v = aa + 25*diag[i] + 25; 15614e2b4712SSatish Balay vi = aj + diag[i] + 1; 15624e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 15634e2b4712SSatish Balay idt = 5*i; 1564*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; 1565*f1af5d2fSBarry Smith s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt]; 15664e2b4712SSatish Balay while (nz--) { 15674e2b4712SSatish Balay idx = 5*(*vi++); 1568*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 1569*f1af5d2fSBarry Smith x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx]; 1570*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1571*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1572*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1573*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1574*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 15754e2b4712SSatish Balay v += 25; 15764e2b4712SSatish Balay } 15774e2b4712SSatish Balay idc = 5*(*c--); 15784e2b4712SSatish Balay v = aa + 25*diag[i]; 1579*f1af5d2fSBarry Smith x[idc] = t[idt] = v[0]*s1+v[5]*s2+v[10]*s3+ 1580*f1af5d2fSBarry Smith v[15]*s4+v[20]*s5; 1581*f1af5d2fSBarry Smith x[1+idc] = t[1+idt] = v[1]*s1+v[6]*s2+v[11]*s3+ 1582*f1af5d2fSBarry Smith v[16]*s4+v[21]*s5; 1583*f1af5d2fSBarry Smith x[2+idc] = t[2+idt] = v[2]*s1+v[7]*s2+v[12]*s3+ 1584*f1af5d2fSBarry Smith v[17]*s4+v[22]*s5; 1585*f1af5d2fSBarry Smith x[3+idc] = t[3+idt] = v[3]*s1+v[8]*s2+v[13]*s3+ 1586*f1af5d2fSBarry Smith v[18]*s4+v[23]*s5; 1587*f1af5d2fSBarry Smith x[4+idc] = t[4+idt] = v[4]*s1+v[9]*s2+v[14]*s3+ 1588*f1af5d2fSBarry Smith v[19]*s4+v[24]*s5; 15894e2b4712SSatish Balay } 15904e2b4712SSatish Balay 15914e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 15924e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1593e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1594e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 159542015d63SBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 15964e2b4712SSatish Balay PetscFunctionReturn(0); 15974e2b4712SSatish Balay } 15984e2b4712SSatish Balay 15994e2b4712SSatish Balay #undef __FUNC__ 160015091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_5_NaturalOrdering" 160115091d37SBarry Smith int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx) 160215091d37SBarry Smith { 160315091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 160415091d37SBarry Smith int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt; 160515091d37SBarry Smith int ierr,*diag = a->diag,jdx; 160615091d37SBarry Smith MatScalar *aa=a->a,*v; 1607*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;; 160815091d37SBarry Smith 160915091d37SBarry Smith PetscFunctionBegin; 161015091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 161115091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 161215091d37SBarry Smith /* forward solve the lower triangular */ 161315091d37SBarry Smith idx = 0; 161415091d37SBarry Smith x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx]; 161515091d37SBarry Smith for ( i=1; i<n; i++ ) { 161615091d37SBarry Smith v = aa + 25*ai[i]; 161715091d37SBarry Smith vi = aj + ai[i]; 161815091d37SBarry Smith nz = diag[i] - ai[i]; 161915091d37SBarry Smith idx = 5*i; 1620*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx]; 162115091d37SBarry Smith while (nz--) { 162215091d37SBarry Smith jdx = 5*(*vi++); 162315091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx]; 1624*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1625*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1626*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1627*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1628*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 162915091d37SBarry Smith v += 25; 163015091d37SBarry Smith } 1631*f1af5d2fSBarry Smith x[idx] = s1; 1632*f1af5d2fSBarry Smith x[1+idx] = s2; 1633*f1af5d2fSBarry Smith x[2+idx] = s3; 1634*f1af5d2fSBarry Smith x[3+idx] = s4; 1635*f1af5d2fSBarry Smith x[4+idx] = s5; 163615091d37SBarry Smith } 163715091d37SBarry Smith /* backward solve the upper triangular */ 163815091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 163915091d37SBarry Smith v = aa + 25*diag[i] + 25; 164015091d37SBarry Smith vi = aj + diag[i] + 1; 164115091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 164215091d37SBarry Smith idt = 5*i; 1643*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 1644*f1af5d2fSBarry Smith s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt]; 164515091d37SBarry Smith while (nz--) { 164615091d37SBarry Smith idx = 5*(*vi++); 164715091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 1648*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1649*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1650*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1651*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1652*f1af5d2fSBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 165315091d37SBarry Smith v += 25; 165415091d37SBarry Smith } 165515091d37SBarry Smith v = aa + 25*diag[i]; 1656*f1af5d2fSBarry Smith x[idt] = v[0]*s1 + v[5]*s2 + v[10]*s3 + v[15]*s4 + v[20]*s5; 1657*f1af5d2fSBarry Smith x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3 + v[16]*s4 + v[21]*s5; 1658*f1af5d2fSBarry Smith x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3 + v[17]*s4 + v[22]*s5; 1659*f1af5d2fSBarry Smith x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3 + v[18]*s4 + v[23]*s5; 1660*f1af5d2fSBarry Smith x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3 + v[19]*s4 + v[24]*s5; 166115091d37SBarry Smith } 166215091d37SBarry Smith 166315091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 166415091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 166515091d37SBarry Smith PLogFlops(2*25*(a->nz) - 5*a->n); 166615091d37SBarry Smith PetscFunctionReturn(0); 166715091d37SBarry Smith } 166815091d37SBarry Smith 166915091d37SBarry Smith #undef __FUNC__ 16704e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4" 16714e2b4712SSatish Balay int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx) 16724e2b4712SSatish Balay { 16734e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 16744e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 16754e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 16764e2b4712SSatish Balay int *diag = a->diag; 16773f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 1678*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,s4,x1,x2,x3,x4,*t; 16794e2b4712SSatish Balay 16804e2b4712SSatish Balay PetscFunctionBegin; 1681e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1682e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1683*f1af5d2fSBarry Smith t = a->solve_work; 16844e2b4712SSatish Balay 16854e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 16864e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 16874e2b4712SSatish Balay 16884e2b4712SSatish Balay /* forward solve the lower triangular */ 16894e2b4712SSatish Balay idx = 4*(*r++); 1690*f1af5d2fSBarry Smith t[0] = b[idx]; t[1] = b[1+idx]; 1691*f1af5d2fSBarry Smith t[2] = b[2+idx]; t[3] = b[3+idx]; 16924e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 16934e2b4712SSatish Balay v = aa + 16*ai[i]; 16944e2b4712SSatish Balay vi = aj + ai[i]; 16954e2b4712SSatish Balay nz = diag[i] - ai[i]; 16964e2b4712SSatish Balay idx = 4*(*r++); 1697*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 16984e2b4712SSatish Balay while (nz--) { 16994e2b4712SSatish Balay idx = 4*(*vi++); 1700*f1af5d2fSBarry Smith x1 = t[idx];x2 = t[1+idx];x3 = t[2+idx];x4 = t[3+idx]; 1701*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1702*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1703*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1704*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 17054e2b4712SSatish Balay v += 16; 17064e2b4712SSatish Balay } 17074e2b4712SSatish Balay idx = 4*i; 1708*f1af5d2fSBarry Smith t[idx] = s1;t[1+idx] = s2; 1709*f1af5d2fSBarry Smith t[2+idx] = s3;t[3+idx] = s4; 17104e2b4712SSatish Balay } 17114e2b4712SSatish Balay /* backward solve the upper triangular */ 17124e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 17134e2b4712SSatish Balay v = aa + 16*diag[i] + 16; 17144e2b4712SSatish Balay vi = aj + diag[i] + 1; 17154e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 17164e2b4712SSatish Balay idt = 4*i; 1717*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; 1718*f1af5d2fSBarry Smith s3 = t[2+idt];s4 = t[3+idt]; 17194e2b4712SSatish Balay while (nz--) { 17204e2b4712SSatish Balay idx = 4*(*vi++); 1721*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 1722*f1af5d2fSBarry Smith x3 = t[2+idx]; x4 = t[3+idx]; 1723*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1724*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1725*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1726*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 17274e2b4712SSatish Balay v += 16; 17284e2b4712SSatish Balay } 17294e2b4712SSatish Balay idc = 4*(*c--); 17304e2b4712SSatish Balay v = aa + 16*diag[i]; 1731*f1af5d2fSBarry Smith x[idc] = t[idt] = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4; 1732*f1af5d2fSBarry Smith x[1+idc] = t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4; 1733*f1af5d2fSBarry Smith x[2+idc] = t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4; 1734*f1af5d2fSBarry Smith x[3+idc] = t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4; 17354e2b4712SSatish Balay } 17364e2b4712SSatish Balay 17374e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 17384e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1739e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1740e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 174142015d63SBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 17424e2b4712SSatish Balay PetscFunctionReturn(0); 17434e2b4712SSatish Balay } 17440ef38995SBarry Smith 17450ef38995SBarry Smith 17464e2b4712SSatish Balay /* 17474e2b4712SSatish Balay Special case where the matrix was ILU(0) factored in the natural 17484e2b4712SSatish Balay ordering. This eliminates the need for the column and row permutation. 17494e2b4712SSatish Balay */ 17504e2b4712SSatish Balay #undef __FUNC__ 17514e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_4_NaturalOrdering" 17524e2b4712SSatish Balay int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx) 17534e2b4712SSatish Balay { 17544e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 175530d4dcafSBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 175630d4dcafSBarry Smith int ierr,*diag = a->diag; 17573f1db9ecSBarry Smith MatScalar *aa=a->a; 175830d4dcafSBarry Smith Scalar *x,*b; 17594e2b4712SSatish Balay 17604e2b4712SSatish Balay PetscFunctionBegin; 1761e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1762e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 17634e2b4712SSatish Balay 1764aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS) 17652853dc0eSBarry Smith { 17662853dc0eSBarry Smith static Scalar w[2000]; /* very BAD need to fix */ 17672853dc0eSBarry Smith fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w); 17682853dc0eSBarry Smith } 1769aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ) 17702853dc0eSBarry Smith { 17712853dc0eSBarry Smith static Scalar w[2000]; /* very BAD need to fix */ 17722853dc0eSBarry Smith fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w); 17732853dc0eSBarry Smith } 1774aa482453SBarry Smith #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL) 17752853dc0eSBarry Smith fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b); 1776e1293385SBarry Smith #else 177730d4dcafSBarry Smith { 1778*f1af5d2fSBarry Smith Scalar s1,s2,s3,s4,x1,x2,x3,x4; 17793f1db9ecSBarry Smith MatScalar *v; 178030d4dcafSBarry Smith int jdx,idt,idx,nz,*vi,i; 1781e1293385SBarry Smith 17824e2b4712SSatish Balay /* forward solve the lower triangular */ 17834e2b4712SSatish Balay idx = 0; 1784e1293385SBarry Smith x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3]; 17854e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 17864e2b4712SSatish Balay v = aa + 16*ai[i]; 17874e2b4712SSatish Balay vi = aj + ai[i]; 17884e2b4712SSatish Balay nz = diag[i] - ai[i]; 1789e1293385SBarry Smith idx += 4; 1790*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx]; 17914e2b4712SSatish Balay while (nz--) { 17924e2b4712SSatish Balay jdx = 4*(*vi++); 17934e2b4712SSatish Balay x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx]; 1794*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1795*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1796*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1797*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 17984e2b4712SSatish Balay v += 16; 17994e2b4712SSatish Balay } 1800*f1af5d2fSBarry Smith x[idx] = s1; 1801*f1af5d2fSBarry Smith x[1+idx] = s2; 1802*f1af5d2fSBarry Smith x[2+idx] = s3; 1803*f1af5d2fSBarry Smith x[3+idx] = s4; 18044e2b4712SSatish Balay } 18054e2b4712SSatish Balay /* backward solve the upper triangular */ 18064e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 18074e2b4712SSatish Balay v = aa + 16*diag[i] + 16; 18084e2b4712SSatish Balay vi = aj + diag[i] + 1; 18094e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 18104e2b4712SSatish Balay idt = 4*i; 1811*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 1812*f1af5d2fSBarry Smith s3 = x[2+idt];s4 = x[3+idt]; 18134e2b4712SSatish Balay while (nz--) { 18144e2b4712SSatish Balay idx = 4*(*vi++); 18154e2b4712SSatish Balay x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; 1816*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1817*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1818*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1819*f1af5d2fSBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 18204e2b4712SSatish Balay v += 16; 18214e2b4712SSatish Balay } 18224e2b4712SSatish Balay v = aa + 16*diag[i]; 1823*f1af5d2fSBarry Smith x[idt] = v[0]*s1 + v[4]*s2 + v[8]*s3 + v[12]*s4; 1824*f1af5d2fSBarry Smith x[1+idt] = v[1]*s1 + v[5]*s2 + v[9]*s3 + v[13]*s4; 1825*f1af5d2fSBarry Smith x[2+idt] = v[2]*s1 + v[6]*s2 + v[10]*s3 + v[14]*s4; 1826*f1af5d2fSBarry Smith x[3+idt] = v[3]*s1 + v[7]*s2 + v[11]*s3 + v[15]*s4; 18274e2b4712SSatish Balay } 182830d4dcafSBarry Smith } 1829e1293385SBarry Smith #endif 18304e2b4712SSatish Balay 1831e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1832e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 183342015d63SBarry Smith PLogFlops(2*16*(a->nz) - 4*a->n); 18344e2b4712SSatish Balay PetscFunctionReturn(0); 18354e2b4712SSatish Balay } 18364e2b4712SSatish Balay 1837667159a5SBarry Smith #undef __FUNC__ 18384e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_3" 18394e2b4712SSatish Balay int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx) 18404e2b4712SSatish Balay { 18414e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 18424e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 18434e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 18444e2b4712SSatish Balay int *diag = a->diag; 18453f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 1846*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,x1,x2,x3,*t; 18474e2b4712SSatish Balay 18484e2b4712SSatish Balay PetscFunctionBegin; 1849e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1850e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1851*f1af5d2fSBarry Smith t = a->solve_work; 18524e2b4712SSatish Balay 18534e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 18544e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 18554e2b4712SSatish Balay 18564e2b4712SSatish Balay /* forward solve the lower triangular */ 18574e2b4712SSatish Balay idx = 3*(*r++); 1858*f1af5d2fSBarry Smith t[0] = b[idx]; t[1] = b[1+idx]; t[2] = b[2+idx]; 18594e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 18604e2b4712SSatish Balay v = aa + 9*ai[i]; 18614e2b4712SSatish Balay vi = aj + ai[i]; 18624e2b4712SSatish Balay nz = diag[i] - ai[i]; 18634e2b4712SSatish Balay idx = 3*(*r++); 1864*f1af5d2fSBarry Smith s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx]; 18654e2b4712SSatish Balay while (nz--) { 18664e2b4712SSatish Balay idx = 3*(*vi++); 1867*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 1868*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1869*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1870*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 18714e2b4712SSatish Balay v += 9; 18724e2b4712SSatish Balay } 18734e2b4712SSatish Balay idx = 3*i; 1874*f1af5d2fSBarry Smith t[idx] = s1; t[1+idx] = s2; t[2+idx] = s3; 18754e2b4712SSatish Balay } 18764e2b4712SSatish Balay /* backward solve the upper triangular */ 18774e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 18784e2b4712SSatish Balay v = aa + 9*diag[i] + 9; 18794e2b4712SSatish Balay vi = aj + diag[i] + 1; 18804e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 18814e2b4712SSatish Balay idt = 3*i; 1882*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt]; 18834e2b4712SSatish Balay while (nz--) { 18844e2b4712SSatish Balay idx = 3*(*vi++); 1885*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; 1886*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1887*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1888*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 18894e2b4712SSatish Balay v += 9; 18904e2b4712SSatish Balay } 18914e2b4712SSatish Balay idc = 3*(*c--); 18924e2b4712SSatish Balay v = aa + 9*diag[i]; 1893*f1af5d2fSBarry Smith x[idc] = t[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3; 1894*f1af5d2fSBarry Smith x[1+idc] = t[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3; 1895*f1af5d2fSBarry Smith x[2+idc] = t[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3; 18964e2b4712SSatish Balay } 18974e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 18984e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1899e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1900e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 190142015d63SBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 19024e2b4712SSatish Balay PetscFunctionReturn(0); 19034e2b4712SSatish Balay } 19044e2b4712SSatish Balay 190515091d37SBarry Smith /* 190615091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 190715091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 190815091d37SBarry Smith */ 190915091d37SBarry Smith #undef __FUNC__ 191015091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_3_NaturalOrdering" 191115091d37SBarry Smith int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx) 191215091d37SBarry Smith { 191315091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 191415091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 191515091d37SBarry Smith int ierr,*diag = a->diag; 191615091d37SBarry Smith MatScalar *aa=a->a, *v; 1917*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,s3,x1,x2,x3; 191815091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 191915091d37SBarry Smith 192015091d37SBarry Smith PetscFunctionBegin; 192115091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 192215091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 192315091d37SBarry Smith 192415091d37SBarry Smith 192515091d37SBarry Smith /* forward solve the lower triangular */ 192615091d37SBarry Smith idx = 0; 192715091d37SBarry Smith x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; 192815091d37SBarry Smith for ( i=1; i<n; i++ ) { 192915091d37SBarry Smith v = aa + 9*ai[i]; 193015091d37SBarry Smith vi = aj + ai[i]; 193115091d37SBarry Smith nz = diag[i] - ai[i]; 193215091d37SBarry Smith idx += 3; 1933*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx]; 193415091d37SBarry Smith while (nz--) { 193515091d37SBarry Smith jdx = 3*(*vi++); 193615091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx]; 1937*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1938*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1939*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 194015091d37SBarry Smith v += 9; 194115091d37SBarry Smith } 1942*f1af5d2fSBarry Smith x[idx] = s1; 1943*f1af5d2fSBarry Smith x[1+idx] = s2; 1944*f1af5d2fSBarry Smith x[2+idx] = s3; 194515091d37SBarry Smith } 194615091d37SBarry Smith /* backward solve the upper triangular */ 194715091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 194815091d37SBarry Smith v = aa + 9*diag[i] + 9; 194915091d37SBarry Smith vi = aj + diag[i] + 1; 195015091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 195115091d37SBarry Smith idt = 3*i; 1952*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 1953*f1af5d2fSBarry Smith s3 = x[2+idt]; 195415091d37SBarry Smith while (nz--) { 195515091d37SBarry Smith idx = 3*(*vi++); 195615091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1957*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1958*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1959*f1af5d2fSBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 196015091d37SBarry Smith v += 9; 196115091d37SBarry Smith } 196215091d37SBarry Smith v = aa + 9*diag[i]; 1963*f1af5d2fSBarry Smith x[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3; 1964*f1af5d2fSBarry Smith x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3; 1965*f1af5d2fSBarry Smith x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3; 196615091d37SBarry Smith } 196715091d37SBarry Smith 196815091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 196915091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 197015091d37SBarry Smith PLogFlops(2*9*(a->nz) - 3*a->n); 197115091d37SBarry Smith PetscFunctionReturn(0); 197215091d37SBarry Smith } 197315091d37SBarry Smith 19744e2b4712SSatish Balay #undef __FUNC__ 19754e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_2" 19764e2b4712SSatish Balay int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx) 19774e2b4712SSatish Balay { 19784e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 19794e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 19804e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout; 19814e2b4712SSatish Balay int *diag = a->diag; 19823f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 1983*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,x1,x2,*t; 19844e2b4712SSatish Balay 19854e2b4712SSatish Balay PetscFunctionBegin; 1986e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1987e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1988*f1af5d2fSBarry Smith t = a->solve_work; 19894e2b4712SSatish Balay 19904e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 19914e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 19924e2b4712SSatish Balay 19934e2b4712SSatish Balay /* forward solve the lower triangular */ 19944e2b4712SSatish Balay idx = 2*(*r++); 1995*f1af5d2fSBarry Smith t[0] = b[idx]; t[1] = b[1+idx]; 19964e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 19974e2b4712SSatish Balay v = aa + 4*ai[i]; 19984e2b4712SSatish Balay vi = aj + ai[i]; 19994e2b4712SSatish Balay nz = diag[i] - ai[i]; 20004e2b4712SSatish Balay idx = 2*(*r++); 2001*f1af5d2fSBarry Smith s1 = b[idx]; s2 = b[1+idx]; 20024e2b4712SSatish Balay while (nz--) { 20034e2b4712SSatish Balay idx = 2*(*vi++); 2004*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 2005*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[2]*x2; 2006*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[3]*x2; 20074e2b4712SSatish Balay v += 4; 20084e2b4712SSatish Balay } 20094e2b4712SSatish Balay idx = 2*i; 2010*f1af5d2fSBarry Smith t[idx] = s1; t[1+idx] = s2; 20114e2b4712SSatish Balay } 20124e2b4712SSatish Balay /* backward solve the upper triangular */ 20134e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 20144e2b4712SSatish Balay v = aa + 4*diag[i] + 4; 20154e2b4712SSatish Balay vi = aj + diag[i] + 1; 20164e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 20174e2b4712SSatish Balay idt = 2*i; 2018*f1af5d2fSBarry Smith s1 = t[idt]; s2 = t[1+idt]; 20194e2b4712SSatish Balay while (nz--) { 20204e2b4712SSatish Balay idx = 2*(*vi++); 2021*f1af5d2fSBarry Smith x1 = t[idx]; x2 = t[1+idx]; 2022*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[2]*x2; 2023*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[3]*x2; 20244e2b4712SSatish Balay v += 4; 20254e2b4712SSatish Balay } 20264e2b4712SSatish Balay idc = 2*(*c--); 20274e2b4712SSatish Balay v = aa + 4*diag[i]; 2028*f1af5d2fSBarry Smith x[idc] = t[idt] = v[0]*s1 + v[2]*s2; 2029*f1af5d2fSBarry Smith x[1+idc] = t[1+idt] = v[1]*s1 + v[3]*s2; 20304e2b4712SSatish Balay } 20314e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 20324e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 2033e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2034e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 203542015d63SBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 20364e2b4712SSatish Balay PetscFunctionReturn(0); 20374e2b4712SSatish Balay } 20384e2b4712SSatish Balay 203915091d37SBarry Smith /* 204015091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 204115091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 204215091d37SBarry Smith */ 204315091d37SBarry Smith #undef __FUNC__ 204415091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_2_NaturalOrdering" 204515091d37SBarry Smith int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx) 204615091d37SBarry Smith { 204715091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 204815091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 204915091d37SBarry Smith int ierr,*diag = a->diag; 205015091d37SBarry Smith MatScalar *aa=a->a,*v; 2051*f1af5d2fSBarry Smith Scalar *x,*b,s1,s2,x1,x2; 205215091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 205315091d37SBarry Smith 205415091d37SBarry Smith PetscFunctionBegin; 205515091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 205615091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 205715091d37SBarry Smith 205815091d37SBarry Smith /* forward solve the lower triangular */ 205915091d37SBarry Smith idx = 0; 206015091d37SBarry Smith x[0] = b[0]; x[1] = b[1]; 206115091d37SBarry Smith for ( i=1; i<n; i++ ) { 206215091d37SBarry Smith v = aa + 4*ai[i]; 206315091d37SBarry Smith vi = aj + ai[i]; 206415091d37SBarry Smith nz = diag[i] - ai[i]; 206515091d37SBarry Smith idx += 2; 2066*f1af5d2fSBarry Smith s1 = b[idx];s2 = b[1+idx]; 206715091d37SBarry Smith while (nz--) { 206815091d37SBarry Smith jdx = 2*(*vi++); 206915091d37SBarry Smith x1 = x[jdx];x2 = x[1+jdx]; 2070*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[2]*x2; 2071*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[3]*x2; 207215091d37SBarry Smith v += 4; 207315091d37SBarry Smith } 2074*f1af5d2fSBarry Smith x[idx] = s1; 2075*f1af5d2fSBarry Smith x[1+idx] = s2; 207615091d37SBarry Smith } 207715091d37SBarry Smith /* backward solve the upper triangular */ 207815091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 207915091d37SBarry Smith v = aa + 4*diag[i] + 4; 208015091d37SBarry Smith vi = aj + diag[i] + 1; 208115091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 208215091d37SBarry Smith idt = 2*i; 2083*f1af5d2fSBarry Smith s1 = x[idt]; s2 = x[1+idt]; 208415091d37SBarry Smith while (nz--) { 208515091d37SBarry Smith idx = 2*(*vi++); 208615091d37SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 2087*f1af5d2fSBarry Smith s1 -= v[0]*x1 + v[2]*x2; 2088*f1af5d2fSBarry Smith s2 -= v[1]*x1 + v[3]*x2; 208915091d37SBarry Smith v += 4; 209015091d37SBarry Smith } 209115091d37SBarry Smith v = aa + 4*diag[i]; 2092*f1af5d2fSBarry Smith x[idt] = v[0]*s1 + v[2]*s2; 2093*f1af5d2fSBarry Smith x[1+idt] = v[1]*s1 + v[3]*s2; 209415091d37SBarry Smith } 209515091d37SBarry Smith 209615091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 209715091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 209815091d37SBarry Smith PLogFlops(2*4*(a->nz) - 2*a->n); 209915091d37SBarry Smith PetscFunctionReturn(0); 210015091d37SBarry Smith } 210115091d37SBarry Smith 21024e2b4712SSatish Balay #undef __FUNC__ 21034e2b4712SSatish Balay #define __FUNC__ "MatSolve_SeqBAIJ_1" 21044e2b4712SSatish Balay int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx) 21054e2b4712SSatish Balay { 21064e2b4712SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 21074e2b4712SSatish Balay IS iscol=a->col,isrow=a->row; 21084e2b4712SSatish Balay int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout; 21094e2b4712SSatish Balay int *diag = a->diag; 21103f1db9ecSBarry Smith MatScalar *aa=a->a,*v; 2111*f1af5d2fSBarry Smith Scalar *x,*b,s1,*t; 21124e2b4712SSatish Balay 21134e2b4712SSatish Balay PetscFunctionBegin; 21144e2b4712SSatish Balay if (!n) PetscFunctionReturn(0); 21154e2b4712SSatish Balay 2116e1311b90SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2117e1311b90SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2118*f1af5d2fSBarry Smith t = a->solve_work; 21194e2b4712SSatish Balay 21204e2b4712SSatish Balay ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 21214e2b4712SSatish Balay ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 21224e2b4712SSatish Balay 21234e2b4712SSatish Balay /* forward solve the lower triangular */ 2124*f1af5d2fSBarry Smith t[0] = b[*r++]; 21254e2b4712SSatish Balay for ( i=1; i<n; i++ ) { 21264e2b4712SSatish Balay v = aa + ai[i]; 21274e2b4712SSatish Balay vi = aj + ai[i]; 21284e2b4712SSatish Balay nz = diag[i] - ai[i]; 2129*f1af5d2fSBarry Smith s1 = b[*r++]; 21304e2b4712SSatish Balay while (nz--) { 2131*f1af5d2fSBarry Smith s1 -= (*v++)*t[*vi++]; 21324e2b4712SSatish Balay } 2133*f1af5d2fSBarry Smith t[i] = s1; 21344e2b4712SSatish Balay } 21354e2b4712SSatish Balay /* backward solve the upper triangular */ 21364e2b4712SSatish Balay for ( i=n-1; i>=0; i-- ){ 21374e2b4712SSatish Balay v = aa + diag[i] + 1; 21384e2b4712SSatish Balay vi = aj + diag[i] + 1; 21394e2b4712SSatish Balay nz = ai[i+1] - diag[i] - 1; 2140*f1af5d2fSBarry Smith s1 = t[i]; 21414e2b4712SSatish Balay while (nz--) { 2142*f1af5d2fSBarry Smith s1 -= (*v++)*t[*vi++]; 21434e2b4712SSatish Balay } 2144*f1af5d2fSBarry Smith x[*c--] = t[i] = aa[diag[i]]*s1; 21454e2b4712SSatish Balay } 21464e2b4712SSatish Balay 21474e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 21484e2b4712SSatish Balay ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 2149e1311b90SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2150e1311b90SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 21514e2b4712SSatish Balay PLogFlops(2*1*(a->nz) - a->n); 21524e2b4712SSatish Balay PetscFunctionReturn(0); 21534e2b4712SSatish Balay } 215415091d37SBarry Smith /* 215515091d37SBarry Smith Special case where the matrix was ILU(0) factored in the natural 215615091d37SBarry Smith ordering. This eliminates the need for the column and row permutation. 215715091d37SBarry Smith */ 215815091d37SBarry Smith #undef __FUNC__ 215915091d37SBarry Smith #define __FUNC__ "MatSolve_SeqBAIJ_1_NaturalOrdering" 216015091d37SBarry Smith int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) 216115091d37SBarry Smith { 216215091d37SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 216315091d37SBarry Smith int n=a->mbs,*ai=a->i,*aj=a->j; 216415091d37SBarry Smith int ierr,*diag = a->diag; 216515091d37SBarry Smith MatScalar *aa=a->a; 216615091d37SBarry Smith Scalar *x,*b; 2167*f1af5d2fSBarry Smith Scalar s1,x1; 216815091d37SBarry Smith MatScalar *v; 216915091d37SBarry Smith int jdx,idt,idx,nz,*vi,i; 217015091d37SBarry Smith 217115091d37SBarry Smith PetscFunctionBegin; 217215091d37SBarry Smith ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 217315091d37SBarry Smith ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 217415091d37SBarry Smith 217515091d37SBarry Smith /* forward solve the lower triangular */ 217615091d37SBarry Smith idx = 0; 217715091d37SBarry Smith x[0] = b[0]; 217815091d37SBarry Smith for ( i=1; i<n; i++ ) { 217915091d37SBarry Smith v = aa + ai[i]; 218015091d37SBarry Smith vi = aj + ai[i]; 218115091d37SBarry Smith nz = diag[i] - ai[i]; 218215091d37SBarry Smith idx += 1; 2183*f1af5d2fSBarry Smith s1 = b[idx]; 218415091d37SBarry Smith while (nz--) { 218515091d37SBarry Smith jdx = *vi++; 218615091d37SBarry Smith x1 = x[jdx]; 2187*f1af5d2fSBarry Smith s1 -= v[0]*x1; 218815091d37SBarry Smith v += 1; 218915091d37SBarry Smith } 2190*f1af5d2fSBarry Smith x[idx] = s1; 219115091d37SBarry Smith } 219215091d37SBarry Smith /* backward solve the upper triangular */ 219315091d37SBarry Smith for ( i=n-1; i>=0; i-- ){ 219415091d37SBarry Smith v = aa + diag[i] + 1; 219515091d37SBarry Smith vi = aj + diag[i] + 1; 219615091d37SBarry Smith nz = ai[i+1] - diag[i] - 1; 219715091d37SBarry Smith idt = i; 2198*f1af5d2fSBarry Smith s1 = x[idt]; 219915091d37SBarry Smith while (nz--) { 220015091d37SBarry Smith idx = *vi++; 220115091d37SBarry Smith x1 = x[idx]; 2202*f1af5d2fSBarry Smith s1 -= v[0]*x1; 220315091d37SBarry Smith v += 1; 220415091d37SBarry Smith } 220515091d37SBarry Smith v = aa + diag[i]; 2206*f1af5d2fSBarry Smith x[idt] = v[0]*s1; 220715091d37SBarry Smith } 220815091d37SBarry Smith ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 220915091d37SBarry Smith ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 221015091d37SBarry Smith PLogFlops(2*(a->nz) - a->n); 221115091d37SBarry Smith PetscFunctionReturn(0); 221215091d37SBarry Smith } 22134e2b4712SSatish Balay 22144e2b4712SSatish Balay /* ----------------------------------------------------------------*/ 22154e2b4712SSatish Balay /* 22164e2b4712SSatish Balay This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 22174e2b4712SSatish Balay except that the data structure of Mat_SeqAIJ is slightly different. 22184e2b4712SSatish Balay Not a good example of code reuse. 22194e2b4712SSatish Balay */ 2220435faa5fSBarry Smith extern int MatMissingDiag_SeqBAIJ(Mat); 2221435faa5fSBarry Smith 22224e2b4712SSatish Balay #undef __FUNC__ 22234e2b4712SSatish Balay #define __FUNC__ "MatILUFactorSymbolic_SeqBAIJ" 2224435faa5fSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 22254e2b4712SSatish Balay { 22264e2b4712SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 22274e2b4712SSatish Balay IS isicol; 22284e2b4712SSatish Balay int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 22294e2b4712SSatish Balay int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 2230335d9088SBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 2231435faa5fSBarry Smith int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2, levels, diagonal_fill; 22324e2b4712SSatish Balay PetscTruth col_identity, row_identity; 2233435faa5fSBarry Smith double f; 22344e2b4712SSatish Balay 22354e2b4712SSatish Balay PetscFunctionBegin; 2236435faa5fSBarry Smith if (info) { 2237435faa5fSBarry Smith f = info->fill; 2238335d9088SBarry Smith levels = (int) info->levels; 2239335d9088SBarry Smith diagonal_fill = (int) info->diagonal_fill; 2240435faa5fSBarry Smith } else { 2241435faa5fSBarry Smith f = 1.0; 2242435faa5fSBarry Smith levels = 0; 2243435faa5fSBarry Smith diagonal_fill = 0; 2244435faa5fSBarry Smith } 2245e51c0b9cSSatish Balay ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 2246e51c0b9cSSatish Balay 22474e2b4712SSatish Balay /* special case that simply copies fill pattern */ 22484e2b4712SSatish Balay PetscValidHeaderSpecific(isrow,IS_COOKIE); 22494e2b4712SSatish Balay PetscValidHeaderSpecific(iscol,IS_COOKIE); 2250667159a5SBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2251667159a5SBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 22524e2b4712SSatish Balay if (levels == 0 && row_identity && col_identity) { 2253e1293385SBarry Smith ierr = MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 22544e2b4712SSatish Balay (*fact)->factor = FACTOR_LU; 22554e2b4712SSatish Balay b = (Mat_SeqBAIJ *) (*fact)->data; 22564e2b4712SSatish Balay if (!b->diag) { 22574e2b4712SSatish Balay ierr = MatMarkDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 22584e2b4712SSatish Balay } 2259435faa5fSBarry Smith ierr = MatMissingDiag_SeqBAIJ(*fact);CHKERRQ(ierr); 22604e2b4712SSatish Balay b->row = isrow; 22614e2b4712SSatish Balay b->col = iscol; 2262e51c0b9cSSatish Balay b->icol = isicol; 22634e2b4712SSatish Balay b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 22644e2b4712SSatish Balay /* 226515091d37SBarry Smith Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 226615091d37SBarry Smith for ILU(0) factorization with natural ordering 22674e2b4712SSatish Balay */ 226815091d37SBarry Smith switch (b->bs) { 226915091d37SBarry Smith case 2: 227015091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 227115091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 227215091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 227315091d37SBarry Smith break; 227415091d37SBarry Smith case 3: 227515091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 227615091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 227715091d37SBarry Smith PLogInfo(A,"UMatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 227815091d37SBarry Smith break; 227915091d37SBarry Smith case 4: 2280f830108cSBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 2281f830108cSBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 228215091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 228315091d37SBarry Smith break; 228415091d37SBarry Smith case 5: 2285667159a5SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 2286667159a5SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 228715091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 228815091d37SBarry Smith break; 228915091d37SBarry Smith case 6: 229015091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 229115091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 229215091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 229315091d37SBarry Smith break; 229415091d37SBarry Smith case 7: 229515091d37SBarry Smith (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 229615091d37SBarry Smith (*fact)->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 229715091d37SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 229815091d37SBarry Smith break; 22994e2b4712SSatish Balay } 23004e2b4712SSatish Balay PetscFunctionReturn(0); 23014e2b4712SSatish Balay } 23024e2b4712SSatish Balay 23034e2b4712SSatish Balay ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 23044e2b4712SSatish Balay ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 23054e2b4712SSatish Balay 23064e2b4712SSatish Balay /* get new row pointers */ 23074e2b4712SSatish Balay ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 23084e2b4712SSatish Balay ainew[0] = 0; 23094e2b4712SSatish Balay /* don't know how many column pointers are needed so estimate */ 23104e2b4712SSatish Balay jmax = (int) (f*ai[n] + 1); 23114e2b4712SSatish Balay ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 23124e2b4712SSatish Balay /* ajfill is level of fill for each fill entry */ 23134e2b4712SSatish Balay ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 23144e2b4712SSatish Balay /* fill is a linked list of nonzeros in active row */ 23154e2b4712SSatish Balay fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 23164e2b4712SSatish Balay /* im is level for each filled value */ 23174e2b4712SSatish Balay im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 23184e2b4712SSatish Balay /* dloc is location of diagonal in factor */ 23194e2b4712SSatish Balay dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 23204e2b4712SSatish Balay dloc[0] = 0; 23214e2b4712SSatish Balay for ( prow=0; prow<n; prow++ ) { 2322435faa5fSBarry Smith 2323435faa5fSBarry Smith /* copy prow into linked list */ 23244e2b4712SSatish Balay nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 23254e2b4712SSatish Balay if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 23264e2b4712SSatish Balay xi = aj + ai[r[prow]]; 23274e2b4712SSatish Balay fill[n] = n; 2328435faa5fSBarry Smith fill[prow] = -1; /* marker for diagonal entry */ 23294e2b4712SSatish Balay while (nz--) { 23304e2b4712SSatish Balay fm = n; 23314e2b4712SSatish Balay idx = ic[*xi++]; 23324e2b4712SSatish Balay do { 23334e2b4712SSatish Balay m = fm; 23344e2b4712SSatish Balay fm = fill[m]; 23354e2b4712SSatish Balay } while (fm < idx); 23364e2b4712SSatish Balay fill[m] = idx; 23374e2b4712SSatish Balay fill[idx] = fm; 23384e2b4712SSatish Balay im[idx] = 0; 23394e2b4712SSatish Balay } 2340435faa5fSBarry Smith 2341435faa5fSBarry Smith /* make sure diagonal entry is included */ 2342435faa5fSBarry Smith if (diagonal_fill && fill[prow] == -1) { 2343435faa5fSBarry Smith fm = n; 2344435faa5fSBarry Smith while (fill[fm] < prow) fm = fill[fm]; 2345435faa5fSBarry Smith fill[prow] = fill[fm]; /* insert diagonal into linked list */ 2346435faa5fSBarry Smith fill[fm] = prow; 2347435faa5fSBarry Smith im[prow] = 0; 2348435faa5fSBarry Smith nzf++; 2349335d9088SBarry Smith dcount++; 2350435faa5fSBarry Smith } 2351435faa5fSBarry Smith 23524e2b4712SSatish Balay nzi = 0; 23534e2b4712SSatish Balay row = fill[n]; 23544e2b4712SSatish Balay while ( row < prow ) { 23554e2b4712SSatish Balay incrlev = im[row] + 1; 23564e2b4712SSatish Balay nz = dloc[row]; 2357435faa5fSBarry Smith xi = ajnew + ainew[row] + nz + 1; 23584e2b4712SSatish Balay flev = ajfill + ainew[row] + nz + 1; 23594e2b4712SSatish Balay nnz = ainew[row+1] - ainew[row] - nz - 1; 23604e2b4712SSatish Balay fm = row; 23614e2b4712SSatish Balay while (nnz-- > 0) { 23624e2b4712SSatish Balay idx = *xi++; 23634e2b4712SSatish Balay if (*flev + incrlev > levels) { 23644e2b4712SSatish Balay flev++; 23654e2b4712SSatish Balay continue; 23664e2b4712SSatish Balay } 23674e2b4712SSatish Balay do { 23684e2b4712SSatish Balay m = fm; 23694e2b4712SSatish Balay fm = fill[m]; 23704e2b4712SSatish Balay } while (fm < idx); 23714e2b4712SSatish Balay if (fm != idx) { 23724e2b4712SSatish Balay im[idx] = *flev + incrlev; 23734e2b4712SSatish Balay fill[m] = idx; 23744e2b4712SSatish Balay fill[idx] = fm; 23754e2b4712SSatish Balay fm = idx; 23764e2b4712SSatish Balay nzf++; 2377ecf371e4SBarry Smith } else { 23784e2b4712SSatish Balay if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 23794e2b4712SSatish Balay } 23804e2b4712SSatish Balay flev++; 23814e2b4712SSatish Balay } 23824e2b4712SSatish Balay row = fill[row]; 23834e2b4712SSatish Balay nzi++; 23844e2b4712SSatish Balay } 23854e2b4712SSatish Balay /* copy new filled row into permanent storage */ 23864e2b4712SSatish Balay ainew[prow+1] = ainew[prow] + nzf; 23874e2b4712SSatish Balay if (ainew[prow+1] > jmax) { 2388ecf371e4SBarry Smith 2389ecf371e4SBarry Smith /* estimate how much additional space we will need */ 2390ecf371e4SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 2391ecf371e4SBarry Smith /* just double the memory each time */ 2392ecf371e4SBarry Smith int maxadd = jmax; 2393ecf371e4SBarry Smith /* maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); */ 23944e2b4712SSatish Balay if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 23954e2b4712SSatish Balay jmax += maxadd; 2396ecf371e4SBarry Smith 2397ecf371e4SBarry Smith /* allocate a longer ajnew and ajfill */ 23984e2b4712SSatish Balay xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 2399549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));CHKERRQ(ierr); 2400606d414cSSatish Balay ierr = PetscFree(ajnew);CHKERRQ(ierr); 24014e2b4712SSatish Balay ajnew = xi; 24024e2b4712SSatish Balay xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 2403549d3d68SSatish Balay ierr = PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));CHKERRQ(ierr); 2404606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 24054e2b4712SSatish Balay ajfill = xi; 2406ecf371e4SBarry Smith realloc++; /* count how many reallocations are needed */ 24074e2b4712SSatish Balay } 24084e2b4712SSatish Balay xi = ajnew + ainew[prow]; 24094e2b4712SSatish Balay flev = ajfill + ainew[prow]; 24104e2b4712SSatish Balay dloc[prow] = nzi; 24114e2b4712SSatish Balay fm = fill[n]; 24124e2b4712SSatish Balay while (nzf--) { 24134e2b4712SSatish Balay *xi++ = fm; 24144e2b4712SSatish Balay *flev++ = im[fm]; 24154e2b4712SSatish Balay fm = fill[fm]; 24164e2b4712SSatish Balay } 2417435faa5fSBarry Smith /* make sure row has diagonal entry */ 2418435faa5fSBarry Smith if (ajnew[ainew[prow]+dloc[prow]] != prow) { 2419435faa5fSBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 2420435faa5fSBarry Smith try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 2421435faa5fSBarry Smith } 24224e2b4712SSatish Balay } 2423606d414cSSatish Balay ierr = PetscFree(ajfill);CHKERRQ(ierr); 24244e2b4712SSatish Balay ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 24254e2b4712SSatish Balay ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2426606d414cSSatish Balay ierr = PetscFree(fill);CHKERRQ(ierr); 2427606d414cSSatish Balay ierr = PetscFree(im);CHKERRQ(ierr); 24284e2b4712SSatish Balay 24294e2b4712SSatish Balay { 24304e2b4712SSatish Balay double af = ((double)ainew[n])/((double)ai[n]); 2431981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 24324e2b4712SSatish Balay realloc,f,af); 2433981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use \n",af); 2434981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);\n",af); 2435981c4779SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.\n"); 2436335d9088SBarry Smith if (diagonal_fill) { 2437335d9088SBarry Smith PLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount); 2438335d9088SBarry Smith } 24394e2b4712SSatish Balay } 24404e2b4712SSatish Balay 24414e2b4712SSatish Balay /* put together the new matrix */ 24424e2b4712SSatish Balay ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 2443fa6007c9SSatish Balay PLogObjectParent(*fact,isicol); 24444e2b4712SSatish Balay b = (Mat_SeqBAIJ *) (*fact)->data; 2445606d414cSSatish Balay ierr = PetscFree(b->imax);CHKERRQ(ierr); 24464e2b4712SSatish Balay b->singlemalloc = 0; 24473f1db9ecSBarry Smith len = bs2*ainew[n]*sizeof(MatScalar); 24484e2b4712SSatish Balay /* the next line frees the default space generated by the Create() */ 2449606d414cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); 2450606d414cSSatish Balay ierr = PetscFree(b->ilen);CHKERRQ(ierr); 24513f1db9ecSBarry Smith b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 24524e2b4712SSatish Balay b->j = ajnew; 24534e2b4712SSatish Balay b->i = ainew; 24544e2b4712SSatish Balay for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 24554e2b4712SSatish Balay b->diag = dloc; 24564e2b4712SSatish Balay b->ilen = 0; 24574e2b4712SSatish Balay b->imax = 0; 24584e2b4712SSatish Balay b->row = isrow; 24594e2b4712SSatish Balay b->col = iscol; 2460e51c0b9cSSatish Balay b->icol = isicol; 24614e2b4712SSatish Balay b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 24624e2b4712SSatish Balay /* In b structure: Free imax, ilen, old a, old j. 24634e2b4712SSatish Balay Allocate dloc, solve_work, new a, new j */ 24644e2b4712SSatish Balay PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar)); 24654e2b4712SSatish Balay b->maxnz = b->nz = ainew[n]; 24664e2b4712SSatish Balay (*fact)->factor = FACTOR_LU; 24674e2b4712SSatish Balay 24684e2b4712SSatish Balay (*fact)->info.factor_mallocs = realloc; 24694e2b4712SSatish Balay (*fact)->info.fill_ratio_given = f; 24704e2b4712SSatish Balay (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 24714e2b4712SSatish Balay 24724e2b4712SSatish Balay PetscFunctionReturn(0); 24734e2b4712SSatish Balay } 24744e2b4712SSatish Balay 24754e2b4712SSatish Balay 24764e2b4712SSatish Balay 24774e2b4712SSatish Balay 2478