1 /*$Id: pbjacobi.c,v 1.4 2001/08/07 03:03:42 balay Exp $*/ 2 3 /* 4 Include files needed for the PBJacobi preconditioner: 5 pcimpl.h - private include file intended for use by all preconditioners 6 */ 7 8 #include "src/ksp/pc/pcimpl.h" /*I "petscpc.h" I*/ 9 10 /* 11 Private context (data structure) for the PBJacobi preconditioner. 12 */ 13 typedef struct { 14 PetscScalar *diag; 15 int bs,mbs; 16 } PC_PBJacobi; 17 18 /* 19 Currently only implemented for baij matrices and directly access baij 20 data structures. 21 */ 22 #include "src/mat/impls/baij/mpi/mpibaij.h" 23 #include "src/inline/ilu.h" 24 25 #undef __FUNCT__ 26 #define __FUNCT__ "PCApply_PBJacobi_2" 27 static int PCApply_PBJacobi_2(PC pc,Vec x,Vec y) 28 { 29 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 30 int ierr,i,m = jac->mbs; 31 PetscScalar *diag = jac->diag,x0,x1,*xx,*yy; 32 33 PetscFunctionBegin; 34 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 35 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 36 for (i=0; i<m; i++) { 37 x0 = xx[2*i]; x1 = xx[2*i+1]; 38 yy[2*i] = diag[0]*x0 + diag[2]*x1; 39 yy[2*i+1] = diag[1]*x0 + diag[3]*x1; 40 diag += 4; 41 } 42 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 43 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 44 PetscLogFlops(6*m); 45 PetscFunctionReturn(0); 46 } 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCApply_PBJacobi_3" 49 static int PCApply_PBJacobi_3(PC pc,Vec x,Vec y) 50 { 51 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 52 int ierr,i,m = jac->mbs; 53 PetscScalar *diag = jac->diag,x0,x1,x2,*xx,*yy; 54 55 PetscFunctionBegin; 56 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 57 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 58 for (i=0; i<m; i++) { 59 x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2]; 60 yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2; 61 yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2; 62 yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2; 63 diag += 9; 64 } 65 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 66 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 67 PetscLogFlops(15*m); 68 PetscFunctionReturn(0); 69 } 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCApply_PBJacobi_4" 72 static int PCApply_PBJacobi_4(PC pc,Vec x,Vec y) 73 { 74 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 75 int ierr,i,m = jac->mbs; 76 PetscScalar *diag = jac->diag,x0,x1,x2,x3,*xx,*yy; 77 78 PetscFunctionBegin; 79 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 80 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 81 for (i=0; i<m; i++) { 82 x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3]; 83 yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3; 84 yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3; 85 yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3; 86 yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3; 87 diag += 16; 88 } 89 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 90 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 91 PetscLogFlops(28*m); 92 PetscFunctionReturn(0); 93 } 94 #undef __FUNCT__ 95 #define __FUNCT__ "PCApply_PBJacobi_5" 96 static int PCApply_PBJacobi_5(PC pc,Vec x,Vec y) 97 { 98 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 99 int ierr,i,m = jac->mbs; 100 PetscScalar *diag = jac->diag,x0,x1,x2,x3,x4,*xx,*yy; 101 102 PetscFunctionBegin; 103 ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 104 ierr = VecGetArray(y,&yy);CHKERRQ(ierr); 105 for (i=0; i<m; i++) { 106 x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; 107 yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; 108 yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; 109 yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; 110 yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; 111 yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; 112 diag += 25; 113 } 114 ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 115 ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); 116 PetscLogFlops(45*m); 117 PetscFunctionReturn(0); 118 } 119 /* -------------------------------------------------------------------------- */ 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCSetUp_PBJacobi" 122 static int PCSetUp_PBJacobi(PC pc) 123 { 124 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 125 int ierr,i,*diag_offset,bs2; 126 PetscTruth seqbaij,mpibaij; 127 Mat A = pc->pmat; 128 PetscScalar *diag,*odiag,*v; 129 Mat_SeqBAIJ *a; 130 131 PetscFunctionBegin; 132 ierr = PetscTypeCompare((PetscObject)pc->pmat,MATSEQBAIJ,&seqbaij);CHKERRQ(ierr); 133 ierr = PetscTypeCompare((PetscObject)pc->pmat,MATMPIBAIJ,&mpibaij);CHKERRQ(ierr); 134 if (!seqbaij && !mpibaij) { 135 SETERRQ(1,"Currently only supports BAIJ matrices"); 136 } 137 if (mpibaij) A = ((Mat_MPIBAIJ*)A->data)->A; 138 if (A->m != A->n) SETERRQ(1,"Supported only for square matrices and square storage"); 139 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 140 a = (Mat_SeqBAIJ*)A->data; 141 bs2 = a->bs*a->bs; 142 diag_offset = a->diag; 143 v = a->a; 144 ierr = PetscMalloc((bs2*a->mbs+1)*sizeof(PetscScalar),&diag);CHKERRQ(ierr); 145 PetscLogObjectMemory(pc,bs2*a->mbs*sizeof(PetscScalar)); 146 jac->diag = diag; 147 jac->bs = a->bs; 148 jac->mbs = a->mbs; 149 150 /* factor and invert each block */ 151 switch (a->bs){ 152 case 2: 153 for (i=0; i<a->mbs; i++) { 154 odiag = v + 4*diag_offset[i]; 155 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 156 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 157 diag += 4; 158 } 159 pc->ops->apply = PCApply_PBJacobi_2; 160 break; 161 case 3: 162 for (i=0; i<a->mbs; i++) { 163 odiag = v + 9*diag_offset[i]; 164 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 165 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 166 diag[8] = odiag[8]; diag[9] = odiag[9]; 167 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 168 diag += 9; 169 } 170 pc->ops->apply = PCApply_PBJacobi_3; 171 break; 172 case 4: 173 for (i=0; i<a->mbs; i++) { 174 odiag = v + 16*diag_offset[i]; 175 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 176 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 177 diag += 16; 178 } 179 pc->ops->apply = PCApply_PBJacobi_4; 180 break; 181 case 5: 182 for (i=0; i<a->mbs; i++) { 183 odiag = v + 25*diag_offset[i]; 184 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 185 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 186 diag += 25; 187 } 188 pc->ops->apply = PCApply_PBJacobi_5; 189 break; 190 default: 191 SETERRQ1(1,"not supported for block size %d",a->bs); 192 } 193 194 PetscFunctionReturn(0); 195 } 196 /* -------------------------------------------------------------------------- */ 197 #undef __FUNCT__ 198 #define __FUNCT__ "PCDestroy_PBJacobi" 199 static int PCDestroy_PBJacobi(PC pc) 200 { 201 PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; 202 int ierr; 203 204 PetscFunctionBegin; 205 if (jac->diag) {ierr = PetscFree(jac->diag);CHKERRQ(ierr);} 206 207 /* 208 Free the private data structure that was hanging off the PC 209 */ 210 ierr = PetscFree(jac);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 /* -------------------------------------------------------------------------- */ 214 EXTERN_C_BEGIN 215 #undef __FUNCT__ 216 #define __FUNCT__ "PCCreate_PBJacobi" 217 int PCCreate_PBJacobi(PC pc) 218 { 219 PC_PBJacobi *jac; 220 int ierr; 221 222 PetscFunctionBegin; 223 224 /* 225 Creates the private data structure for this preconditioner and 226 attach it to the PC object. 227 */ 228 ierr = PetscNew(PC_PBJacobi,&jac);CHKERRQ(ierr); 229 pc->data = (void*)jac; 230 231 /* 232 Logs the memory usage; this is not needed but allows PETSc to 233 monitor how much memory is being used for various purposes. 234 */ 235 PetscLogObjectMemory(pc,sizeof(PC_PBJacobi)); 236 237 /* 238 Initialize the pointers to vectors to ZERO; these will be used to store 239 diagonal entries of the matrix for fast preconditioner application. 240 */ 241 jac->diag = 0; 242 243 /* 244 Set the pointers for the functions that are provided above. 245 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 246 are called, they will automatically call these functions. Note we 247 choose not to provide a couple of these functions since they are 248 not needed. 249 */ 250 pc->ops->apply = 0; /*set depending on the block size */ 251 pc->ops->applytranspose = 0; 252 pc->ops->setup = PCSetUp_PBJacobi; 253 pc->ops->destroy = PCDestroy_PBJacobi; 254 pc->ops->setfromoptions = 0; 255 pc->ops->view = 0; 256 pc->ops->applyrichardson = 0; 257 pc->ops->applysymmetricleft = 0; 258 pc->ops->applysymmetricright = 0; 259 PetscFunctionReturn(0); 260 } 261 EXTERN_C_END 262 263 264