1*4b9ad928SBarry Smith /*$Id: pbjacobi.c,v 1.4 2001/08/07 03:03:42 balay Exp $*/ 2*4b9ad928SBarry Smith 3*4b9ad928SBarry Smith /* 4*4b9ad928SBarry Smith Include files needed for the PBJacobi preconditioner: 5*4b9ad928SBarry Smith pcimpl.h - private include file intended for use by all preconditioners 6*4b9ad928SBarry Smith */ 7*4b9ad928SBarry Smith 8*4b9ad928SBarry Smith #include "src/ksp/pc/pcimpl.h" /*I "petscpc.h" I*/ 9*4b9ad928SBarry Smith 10*4b9ad928SBarry Smith /* 11*4b9ad928SBarry Smith Private context (data structure) for the PBJacobi preconditioner. 12*4b9ad928SBarry Smith */ 13*4b9ad928SBarry Smith typedef struct { 14*4b9ad928SBarry Smith PetscScalar *diag; 15*4b9ad928SBarry Smith int bs,mbs; 16*4b9ad928SBarry Smith } PC_PBJacobi; 17*4b9ad928SBarry Smith 18*4b9ad928SBarry Smith /* 19*4b9ad928SBarry Smith Currently only implemented for baij matrices and directly access baij 20*4b9ad928SBarry Smith data structures. 21*4b9ad928SBarry Smith */ 22*4b9ad928SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h" 23*4b9ad928SBarry Smith #include "src/inline/ilu.h" 24*4b9ad928SBarry Smith 25*4b9ad928SBarry Smith #undef __FUNCT__ 26*4b9ad928SBarry Smith #define __FUNCT__ "PCApply_PBJacobi_2" 27*4b9ad928SBarry Smith static int PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 28*4b9ad928SBarry Smith { 29*4b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 30*4b9ad928SBarry Smith int ierr,i,m = jac->mbs; 31*4b9ad928SBarry Smith PetscScalar *diag = jac->diag,x0,x1,*xx,*yy; 32*4b9ad928SBarry Smith 33*4b9ad928SBarry Smith PetscFunctionBegin; 34*4b9ad928SBarry Smith ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 35*4b9ad928SBarry Smith ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 36*4b9ad928SBarry Smith for (i=0; i<m; i++) { 37*4b9ad928SBarry Smith x0 = xx[2*i]; x1 = xx[2*i+1]; 38*4b9ad928SBarry Smith yy[2*i] = diag[0]*x0 + diag[2]*x1; 39*4b9ad928SBarry Smith yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 40*4b9ad928SBarry Smith diag += 4; 41*4b9ad928SBarry Smith } 42*4b9ad928SBarry Smith ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 43*4b9ad928SBarry Smith ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 44*4b9ad928SBarry Smith PetscLogFlops(6*m); 45*4b9ad928SBarry Smith PetscFunctionReturn(0); 46*4b9ad928SBarry Smith } 47*4b9ad928SBarry Smith #undef __FUNCT__ 48*4b9ad928SBarry Smith #define __FUNCT__ "PCApply_PBJacobi_3" 49*4b9ad928SBarry Smith static int PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 50*4b9ad928SBarry Smith { 51*4b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 52*4b9ad928SBarry Smith int ierr,i,m = jac->mbs; 53*4b9ad928SBarry Smith PetscScalar *diag = jac->diag,x0,x1,x2,*xx,*yy; 54*4b9ad928SBarry Smith 55*4b9ad928SBarry Smith PetscFunctionBegin; 56*4b9ad928SBarry Smith ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 57*4b9ad928SBarry Smith ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 58*4b9ad928SBarry Smith for (i=0; i<m; i++) { 59*4b9ad928SBarry Smith x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 60*4b9ad928SBarry Smith yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 61*4b9ad928SBarry Smith yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 62*4b9ad928SBarry Smith yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 63*4b9ad928SBarry Smith diag += 9; 64*4b9ad928SBarry Smith } 65*4b9ad928SBarry Smith ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 66*4b9ad928SBarry Smith ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 67*4b9ad928SBarry Smith PetscLogFlops(15*m); 68*4b9ad928SBarry Smith PetscFunctionReturn(0); 69*4b9ad928SBarry Smith } 70*4b9ad928SBarry Smith #undef __FUNCT__ 71*4b9ad928SBarry Smith #define __FUNCT__ "PCApply_PBJacobi_4" 72*4b9ad928SBarry Smith static int PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 73*4b9ad928SBarry Smith { 74*4b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 75*4b9ad928SBarry Smith int ierr,i,m = jac->mbs; 76*4b9ad928SBarry Smith PetscScalar *diag = jac->diag,x0,x1,x2,x3,*xx,*yy; 77*4b9ad928SBarry Smith 78*4b9ad928SBarry Smith PetscFunctionBegin; 79*4b9ad928SBarry Smith ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 80*4b9ad928SBarry Smith ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 81*4b9ad928SBarry Smith for (i=0; i<m; i++) { 82*4b9ad928SBarry Smith x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 83*4b9ad928SBarry Smith yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 84*4b9ad928SBarry Smith yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 85*4b9ad928SBarry Smith yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 86*4b9ad928SBarry Smith yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 87*4b9ad928SBarry Smith diag += 16; 88*4b9ad928SBarry Smith } 89*4b9ad928SBarry Smith ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 90*4b9ad928SBarry Smith ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 91*4b9ad928SBarry Smith PetscLogFlops(28*m); 92*4b9ad928SBarry Smith PetscFunctionReturn(0); 93*4b9ad928SBarry Smith } 94*4b9ad928SBarry Smith #undef __FUNCT__ 95*4b9ad928SBarry Smith #define __FUNCT__ "PCApply_PBJacobi_5" 96*4b9ad928SBarry Smith static int PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 97*4b9ad928SBarry Smith { 98*4b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 99*4b9ad928SBarry Smith int ierr,i,m = jac->mbs; 100*4b9ad928SBarry Smith PetscScalar *diag = jac->diag,x0,x1,x2,x3,x4,*xx,*yy; 101*4b9ad928SBarry Smith 102*4b9ad928SBarry Smith PetscFunctionBegin; 103*4b9ad928SBarry Smith ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 104*4b9ad928SBarry Smith ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 105*4b9ad928SBarry Smith for (i=0; i<m; i++) { 106*4b9ad928SBarry Smith x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 107*4b9ad928SBarry Smith yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 108*4b9ad928SBarry Smith yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 109*4b9ad928SBarry Smith yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 110*4b9ad928SBarry Smith yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 111*4b9ad928SBarry Smith yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 112*4b9ad928SBarry Smith diag += 25; 113*4b9ad928SBarry Smith } 114*4b9ad928SBarry Smith ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 115*4b9ad928SBarry Smith ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 116*4b9ad928SBarry Smith PetscLogFlops(45*m); 117*4b9ad928SBarry Smith PetscFunctionReturn(0); 118*4b9ad928SBarry Smith } 119*4b9ad928SBarry Smith /* -------------------------------------------------------------------------- */ 120*4b9ad928SBarry Smith #undef __FUNCT__ 121*4b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_PBJacobi" 122*4b9ad928SBarry Smith static int PCSetUp_PBJacobi(PC pc) 123*4b9ad928SBarry Smith { 124*4b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 125*4b9ad928SBarry Smith int ierr,i,*diag_offset,bs2; 126*4b9ad928SBarry Smith PetscTruth seqbaij,mpibaij; 127*4b9ad928SBarry Smith Mat A = pc->pmat; 128*4b9ad928SBarry Smith PetscScalar *diag,*odiag,*v; 129*4b9ad928SBarry Smith Mat_SeqBAIJ *a; 130*4b9ad928SBarry Smith 131*4b9ad928SBarry Smith PetscFunctionBegin; 132*4b9ad928SBarry Smith ierr = PetscTypeCompare((PetscObject)pc->pmat,MATSEQBAIJ,&seqbaij);CHKERRQ(ierr); 133*4b9ad928SBarry Smith ierr = PetscTypeCompare((PetscObject)pc->pmat,MATMPIBAIJ,&mpibaij);CHKERRQ(ierr); 134*4b9ad928SBarry Smith if (!seqbaij && !mpibaij) { 135*4b9ad928SBarry Smith SETERRQ(1,"Currently only supports BAIJ matrices"); 136*4b9ad928SBarry Smith } 137*4b9ad928SBarry Smith if (mpibaij) A = ((Mat_MPIBAIJ*)A->data)->A; 138*4b9ad928SBarry Smith if (A->m != A->n) SETERRQ(1,"Supported only for square matrices and square storage"); 139*4b9ad928SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 140*4b9ad928SBarry Smith a = (Mat_SeqBAIJ*)A->data; 141*4b9ad928SBarry Smith bs2 = a->bs*a->bs; 142*4b9ad928SBarry Smith diag_offset = a->diag; 143*4b9ad928SBarry Smith v = a->a; 144*4b9ad928SBarry Smith ierr = PetscMalloc((bs2*a->mbs+1)*sizeof(PetscScalar),&diag);CHKERRQ(ierr); 145*4b9ad928SBarry Smith PetscLogObjectMemory(pc,bs2*a->mbs*sizeof(PetscScalar)); 146*4b9ad928SBarry Smith jac->diag = diag; 147*4b9ad928SBarry Smith jac->bs = a->bs; 148*4b9ad928SBarry Smith jac->mbs = a->mbs; 149*4b9ad928SBarry Smith 150*4b9ad928SBarry Smith /* factor and invert each block */ 151*4b9ad928SBarry Smith switch (a->bs){ 152*4b9ad928SBarry Smith case 2: 153*4b9ad928SBarry Smith for (i=0; i<a->mbs; i++) { 154*4b9ad928SBarry Smith odiag = v + 4*diag_offset[i]; 155*4b9ad928SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 156*4b9ad928SBarry Smith ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 157*4b9ad928SBarry Smith diag += 4; 158*4b9ad928SBarry Smith } 159*4b9ad928SBarry Smith pc->ops->apply = PCApply_PBJacobi_2; 160*4b9ad928SBarry Smith break; 161*4b9ad928SBarry Smith case 3: 162*4b9ad928SBarry Smith for (i=0; i<a->mbs; i++) { 163*4b9ad928SBarry Smith odiag = v + 9*diag_offset[i]; 164*4b9ad928SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 165*4b9ad928SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 166*4b9ad928SBarry Smith diag[8] = odiag[8]; diag[9] = odiag[9]; 167*4b9ad928SBarry Smith ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 168*4b9ad928SBarry Smith diag += 9; 169*4b9ad928SBarry Smith } 170*4b9ad928SBarry Smith pc->ops->apply = PCApply_PBJacobi_3; 171*4b9ad928SBarry Smith break; 172*4b9ad928SBarry Smith case 4: 173*4b9ad928SBarry Smith for (i=0; i<a->mbs; i++) { 174*4b9ad928SBarry Smith odiag = v + 16*diag_offset[i]; 175*4b9ad928SBarry Smith ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 176*4b9ad928SBarry Smith ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 177*4b9ad928SBarry Smith diag += 16; 178*4b9ad928SBarry Smith } 179*4b9ad928SBarry Smith pc->ops->apply = PCApply_PBJacobi_4; 180*4b9ad928SBarry Smith break; 181*4b9ad928SBarry Smith case 5: 182*4b9ad928SBarry Smith for (i=0; i<a->mbs; i++) { 183*4b9ad928SBarry Smith odiag = v + 25*diag_offset[i]; 184*4b9ad928SBarry Smith ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 185*4b9ad928SBarry Smith ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 186*4b9ad928SBarry Smith diag += 25; 187*4b9ad928SBarry Smith } 188*4b9ad928SBarry Smith pc->ops->apply = PCApply_PBJacobi_5; 189*4b9ad928SBarry Smith break; 190*4b9ad928SBarry Smith default: 191*4b9ad928SBarry Smith SETERRQ1(1,"not supported for block size %d",a->bs); 192*4b9ad928SBarry Smith } 193*4b9ad928SBarry Smith 194*4b9ad928SBarry Smith PetscFunctionReturn(0); 195*4b9ad928SBarry Smith } 196*4b9ad928SBarry Smith /* -------------------------------------------------------------------------- */ 197*4b9ad928SBarry Smith #undef __FUNCT__ 198*4b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_PBJacobi" 199*4b9ad928SBarry Smith static int PCDestroy_PBJacobi(PC pc) 200*4b9ad928SBarry Smith { 201*4b9ad928SBarry Smith PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 202*4b9ad928SBarry Smith int ierr; 203*4b9ad928SBarry Smith 204*4b9ad928SBarry Smith PetscFunctionBegin; 205*4b9ad928SBarry Smith if (jac->diag) {ierr = PetscFree(jac->diag);CHKERRQ(ierr);} 206*4b9ad928SBarry Smith 207*4b9ad928SBarry Smith /* 208*4b9ad928SBarry Smith Free the private data structure that was hanging off the PC 209*4b9ad928SBarry Smith */ 210*4b9ad928SBarry Smith ierr = PetscFree(jac);CHKERRQ(ierr); 211*4b9ad928SBarry Smith PetscFunctionReturn(0); 212*4b9ad928SBarry Smith } 213*4b9ad928SBarry Smith /* -------------------------------------------------------------------------- */ 214*4b9ad928SBarry Smith EXTERN_C_BEGIN 215*4b9ad928SBarry Smith #undef __FUNCT__ 216*4b9ad928SBarry Smith #define __FUNCT__ "PCCreate_PBJacobi" 217*4b9ad928SBarry Smith int PCCreate_PBJacobi(PC pc) 218*4b9ad928SBarry Smith { 219*4b9ad928SBarry Smith PC_PBJacobi *jac; 220*4b9ad928SBarry Smith int ierr; 221*4b9ad928SBarry Smith 222*4b9ad928SBarry Smith PetscFunctionBegin; 223*4b9ad928SBarry Smith 224*4b9ad928SBarry Smith /* 225*4b9ad928SBarry Smith Creates the private data structure for this preconditioner and 226*4b9ad928SBarry Smith attach it to the PC object. 227*4b9ad928SBarry Smith */ 228*4b9ad928SBarry Smith ierr = PetscNew(PC_PBJacobi,&jac);CHKERRQ(ierr); 229*4b9ad928SBarry Smith pc->data = (void*)jac; 230*4b9ad928SBarry Smith 231*4b9ad928SBarry Smith /* 232*4b9ad928SBarry Smith Logs the memory usage; this is not needed but allows PETSc to 233*4b9ad928SBarry Smith monitor how much memory is being used for various purposes. 234*4b9ad928SBarry Smith */ 235*4b9ad928SBarry Smith PetscLogObjectMemory(pc,sizeof(PC_PBJacobi)); 236*4b9ad928SBarry Smith 237*4b9ad928SBarry Smith /* 238*4b9ad928SBarry Smith Initialize the pointers to vectors to ZERO; these will be used to store 239*4b9ad928SBarry Smith diagonal entries of the matrix for fast preconditioner application. 240*4b9ad928SBarry Smith */ 241*4b9ad928SBarry Smith jac->diag = 0; 242*4b9ad928SBarry Smith 243*4b9ad928SBarry Smith /* 244*4b9ad928SBarry Smith Set the pointers for the functions that are provided above. 245*4b9ad928SBarry Smith Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 246*4b9ad928SBarry Smith are called, they will automatically call these functions. Note we 247*4b9ad928SBarry Smith choose not to provide a couple of these functions since they are 248*4b9ad928SBarry Smith not needed. 249*4b9ad928SBarry Smith */ 250*4b9ad928SBarry Smith pc->ops->apply = 0; /*set depending on the block size */ 251*4b9ad928SBarry Smith pc->ops->applytranspose = 0; 252*4b9ad928SBarry Smith pc->ops->setup = PCSetUp_PBJacobi; 253*4b9ad928SBarry Smith pc->ops->destroy = PCDestroy_PBJacobi; 254*4b9ad928SBarry Smith pc->ops->setfromoptions = 0; 255*4b9ad928SBarry Smith pc->ops->view = 0; 256*4b9ad928SBarry Smith pc->ops->applyrichardson = 0; 257*4b9ad928SBarry Smith pc->ops->applysymmetricleft = 0; 258*4b9ad928SBarry Smith pc->ops->applysymmetricright = 0; 259*4b9ad928SBarry Smith PetscFunctionReturn(0); 260*4b9ad928SBarry Smith } 261*4b9ad928SBarry Smith EXTERN_C_END 262*4b9ad928SBarry Smith 263*4b9ad928SBarry Smith 264