1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 105 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 106 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 107 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 108 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 110 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 113 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 116 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 117 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 118 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 119 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 120 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 121 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 122 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 #undef __FUNCT__ 127 #define __FUNCT__ "PCBDDCCreateWorkVectors" 128 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 129 { 130 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 131 PC_IS *pcis = (PC_IS*)pc->data; 132 VecType impVecType; 133 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 134 PetscErrorCode ierr; 135 136 PetscFunctionBegin; 137 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 138 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 139 local_primal_size = n_constraints+n_vertices; 140 n_R = pcis->n-n_vertices; 141 /* local work vectors */ 142 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 143 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 149 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 150 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 151 if (n_constraints) { 152 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 153 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 154 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 161 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 162 { 163 PetscErrorCode ierr; 164 /* pointers to pcis and pcbddc */ 165 PC_IS* pcis = (PC_IS*)pc->data; 166 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 167 /* submatrices of local problem */ 168 Mat A_RV,A_VR,A_VV; 169 /* working matrices */ 170 Mat M1,M2,M3,C_CR; 171 /* working vectors */ 172 Vec vec1_C,vec2_C,vec1_V,vec2_V; 173 /* additional working stuff */ 174 IS is_aux; 175 ISLocalToGlobalMapping BtoNmap; 176 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 177 const PetscScalar *array,*row_cmat_values; 178 const PetscInt *row_cmat_indices,*idx_R_local; 179 PetscInt *vertices,*idx_V_B,*auxindices; 180 PetscInt n_vertices,n_constraints,size_of_constraint; 181 PetscInt i,j,n_R,n_D,n_B; 182 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 183 /* Vector and matrix types */ 184 VecType impVecType; 185 MatType impMatType; 186 /* some shortcuts to scalars */ 187 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 188 /* for debugging purposes */ 189 PetscReal *coarsefunctions_errors,*constraints_errors; 190 191 PetscFunctionBegin; 192 /* get number of vertices and their local indices */ 193 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 194 n_constraints = pcbddc->local_primal_size-n_vertices; 195 /* Set Non-overlapping dimensions */ 196 n_B = pcis->n_B; n_D = pcis->n - n_B; 197 n_R = pcis->n-n_vertices; 198 199 /* Set types for local objects needed by BDDC precondtioner */ 200 impMatType = MATSEQDENSE; 201 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 202 203 /* Allocating some extra storage just to be safe */ 204 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 205 for (i=0;i<pcis->n;i++) auxindices[i]=i; 206 207 /* vertices in boundary numbering */ 208 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 209 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 210 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 211 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 212 if (i != n_vertices) { 213 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 214 } 215 216 /* some work vectors on vertices and/or constraints */ 217 if (n_vertices) { 218 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 219 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 220 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 221 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 222 } 223 if (n_constraints) { 224 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 225 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 226 } 227 228 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 229 if (n_constraints) { 230 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 231 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 232 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 233 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 234 235 /* Extract constraints on R nodes: C_{CR} */ 236 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 237 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 238 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 239 240 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 241 for (i=0;i<n_constraints;i++) { 242 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 243 /* Get row of constraint matrix in R numbering */ 244 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 245 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 246 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 247 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 248 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 249 /* Solve for row of constraint matrix in R numbering */ 250 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 251 /* Set values in local_auxmat2 */ 252 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 253 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 254 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 255 } 256 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 258 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 259 260 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 261 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 262 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 263 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 264 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 265 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 266 ierr = MatSetUp(M1);CHKERRQ(ierr); 267 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 268 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 269 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 270 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 271 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 272 ierr = MatDestroy(&M2);CHKERRQ(ierr); 273 ierr = MatDestroy(&M3);CHKERRQ(ierr); 274 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 275 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 276 } 277 278 /* Get submatrices from subdomain matrix */ 279 if (n_vertices) { 280 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 281 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 282 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 283 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 284 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 285 } 286 287 /* Matrix of coarse basis functions (local) */ 288 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 289 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 290 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 291 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 292 if (pcbddc->switch_static || pcbddc->dbg_flag) { 293 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 294 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 295 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 296 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 297 } 298 299 if (pcbddc->dbg_flag) { 300 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 301 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 303 } 304 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 305 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 306 307 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 308 309 /* vertices */ 310 for (i=0;i<n_vertices;i++) { 311 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 312 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 313 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 314 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 315 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 316 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 317 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 318 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 319 if (n_constraints) { 320 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 321 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 322 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 323 } 324 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 325 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 326 327 /* Set values in coarse basis function and subdomain part of coarse_mat */ 328 /* coarse basis functions */ 329 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 330 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 331 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 333 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 334 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 335 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 336 if (pcbddc->switch_static || pcbddc->dbg_flag) { 337 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 338 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 339 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 340 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 342 } 343 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 344 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 345 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 347 if (n_constraints) { 348 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 349 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 350 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 351 } 352 353 /* check */ 354 if (pcbddc->dbg_flag) { 355 /* assemble subdomain vector on local nodes */ 356 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 357 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 358 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 360 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 362 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 363 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 364 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 365 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 366 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 367 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 368 if (n_constraints) { 369 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 370 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 371 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 372 } 373 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 376 /* check saddle point solution */ 377 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 378 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 379 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 380 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 381 /* shift by the identity matrix */ 382 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 383 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 384 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 385 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 386 } 387 } 388 389 /* constraints */ 390 for (i=0;i<n_constraints;i++) { 391 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 392 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 393 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 394 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 395 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 396 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 397 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 398 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 399 if (n_vertices) { 400 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 401 } 402 /* Set values in coarse basis function and subdomain part of coarse_mat */ 403 /* coarse basis functions */ 404 j = i+n_vertices; /* don't touch this! */ 405 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 406 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 409 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 411 if (pcbddc->switch_static || pcbddc->dbg_flag) { 412 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 414 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 415 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 417 } 418 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 419 if (n_vertices) { 420 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 421 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 423 } 424 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 425 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 427 428 if (pcbddc->dbg_flag) { 429 /* assemble subdomain vector on nodes */ 430 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 431 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 432 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 434 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 436 /* assemble subdomain vector of lagrange multipliers */ 437 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 438 if (n_vertices) { 439 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 442 } 443 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 444 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 445 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 449 /* check saddle point solution */ 450 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 451 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 452 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 453 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 454 /* shift by the identity matrix */ 455 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 459 } 460 } 461 /* call assembling routines for local coarse basis */ 462 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 if (pcbddc->switch_static || pcbddc->dbg_flag) { 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 } 468 469 /* compute other basis functions for non-symmetric problems */ 470 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 471 if (!setsym || (setsym && !issym)) { 472 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 473 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 474 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 475 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 478 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 479 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 480 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 } 482 for (i=0;i<pcbddc->local_primal_size;i++) { 483 if (n_constraints) { 484 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 485 for (j=0;j<n_constraints;j++) { 486 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 487 } 488 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 489 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 490 } 491 if (i<n_vertices) { 492 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 493 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 496 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 497 if (n_constraints) { 498 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 499 } 500 } else { 501 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 504 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 505 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 508 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 510 if (i<n_vertices) { 511 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 512 } 513 if (pcbddc->switch_static || pcbddc->dbg_flag) { 514 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 515 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 516 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 517 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 518 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 519 } 520 521 if (pcbddc->dbg_flag) { 522 /* assemble subdomain vector on nodes */ 523 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 524 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 525 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 526 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 527 if (i<n_vertices) { 528 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 532 /* assemble subdomain vector of lagrange multipliers */ 533 for (j=0;j<pcbddc->local_primal_size;j++) { 534 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 535 } 536 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 537 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 538 /* check saddle point solution */ 539 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 540 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 541 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 542 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 543 /* shift by the identity matrix */ 544 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 545 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 546 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 547 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 548 } 549 } 550 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 if (pcbddc->switch_static || pcbddc->dbg_flag) { 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } 556 } 557 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 558 /* Checking coarse_sub_mat and coarse basis functios */ 559 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 560 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 561 if (pcbddc->dbg_flag) { 562 Mat coarse_sub_mat; 563 Mat AUXMAT,TM1,TM2,TM3,TM4; 564 Mat coarse_phi_D,coarse_phi_B; 565 Mat coarse_psi_D,coarse_psi_B; 566 Mat A_II,A_BB,A_IB,A_BI; 567 MatType checkmattype=MATSEQAIJ; 568 PetscReal real_value; 569 570 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 571 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 572 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 573 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 574 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 575 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 576 if (pcbddc->coarse_psi_B) { 577 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 578 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 579 } 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 581 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 584 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 585 if (pcbddc->coarse_psi_B) { 586 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 593 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 594 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 595 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 } else { 599 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 601 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 } 608 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 609 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 610 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 611 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 612 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 613 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 618 for (i=0;i<pcbddc->local_primal_size;i++) { 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 622 for (i=0;i<pcbddc->local_primal_size;i++) { 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 624 } 625 if (pcbddc->coarse_psi_B) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 627 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 629 } 630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 631 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 636 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 637 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 638 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 639 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 640 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 641 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 642 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 643 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 644 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 645 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 646 if (pcbddc->coarse_psi_B) { 647 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 648 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 649 } 650 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 651 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 652 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 653 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 654 } 655 /* free memory */ 656 if (n_vertices) { 657 ierr = PetscFree(vertices);CHKERRQ(ierr); 658 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 659 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 660 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 661 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 662 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 663 } 664 if (n_constraints) { 665 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 666 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 667 ierr = MatDestroy(&M1);CHKERRQ(ierr); 668 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 669 } 670 ierr = PetscFree(auxindices);CHKERRQ(ierr); 671 /* get back data */ 672 *coarse_submat_vals_n = coarse_submat_vals; 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 678 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 679 { 680 PC_IS* pcis = (PC_IS*)(pc->data); 681 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 682 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 683 /* manage repeated solves */ 684 MatReuse reuse; 685 MatStructure matstruct; 686 PetscErrorCode ierr; 687 688 PetscFunctionBegin; 689 /* get mat flags */ 690 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 691 reuse = MAT_INITIAL_MATRIX; 692 if (pc->setupcalled) { 693 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 694 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 695 if (matstruct == SAME_NONZERO_PATTERN) { 696 reuse = MAT_REUSE_MATRIX; 697 } else { 698 reuse = MAT_INITIAL_MATRIX; 699 } 700 } 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 705 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 707 } 708 709 /* transform local matrices if needed */ 710 if (pcbddc->use_change_of_basis) { 711 Mat change_mat_all; 712 PetscScalar *row_cmat_values; 713 PetscInt *row_cmat_indices; 714 PetscInt *nnz,*is_indices,*temp_indices; 715 PetscInt i,j,k,n_D,n_B; 716 717 /* Get Non-overlapping dimensions */ 718 n_B = pcis->n_B; 719 n_D = pcis->n-n_B; 720 721 /* compute nonzero structure of change of basis on all local nodes */ 722 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 724 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 725 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 727 k=1; 728 for (i=0;i<n_B;i++) { 729 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 730 nnz[is_indices[i]]=j; 731 if (k < j) k = j; 732 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 733 } 734 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 735 /* assemble change of basis matrix on the whole set of local dofs */ 736 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 737 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 738 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 739 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 740 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 741 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 742 for (i=0;i<n_D;i++) { 743 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 744 } 745 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 746 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 747 for (i=0;i<n_B;i++) { 748 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 749 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 750 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 752 } 753 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 754 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 756 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 757 if (i==1) { 758 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 759 } else { 760 Mat work_mat; 761 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 762 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 763 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 764 } 765 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 766 ierr = PetscFree(nnz);CHKERRQ(ierr); 767 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 768 } else { 769 /* without change of basis, the local matrix is unchanged */ 770 if (!pcbddc->local_mat) { 771 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 772 pcbddc->local_mat = matis->A; 773 } 774 } 775 776 /* get submatrices */ 777 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 778 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 779 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 780 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 786 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 787 { 788 PC_IS* pcis = (PC_IS*)(pc->data); 789 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 790 IS is_aux1,is_aux2; 791 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 792 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 793 PetscBT bitmask; 794 PetscErrorCode ierr; 795 796 PetscFunctionBegin; 797 /* Set Non-overlapping dimensions */ 798 n_B = pcis->n_B; n_D = pcis->n - n_B; 799 /* get vertex indices from constraint matrix */ 800 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 801 /* Set number of constraints */ 802 n_constraints = pcbddc->local_primal_size-n_vertices; 803 /* create auxiliary bitmask */ 804 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 805 for (i=0;i<n_vertices;i++) { 806 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 807 } 808 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 809 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 810 for (i=0, n_R=0; i<pcis->n; i++) { 811 if (!PetscBTLookup(bitmask,i)) { 812 idx_R_local[n_R] = i; 813 n_R++; 814 } 815 } 816 ierr = PetscFree(vertices);CHKERRQ(ierr); 817 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 818 819 /* print some info if requested */ 820 if (pcbddc->dbg_flag) { 821 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 822 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 826 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 827 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 828 } 829 830 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 831 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 832 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 833 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 834 for (i=0; i<n_D; i++) { 835 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 836 } 837 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 838 for (i=0, j=0; i<n_R; i++) { 839 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 840 aux_array1[j++] = i; 841 } 842 } 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 844 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 845 for (i=0, j=0; i<n_B; i++) { 846 if (!PetscBTLookup(bitmask,is_indices[i])) { 847 aux_array2[j++] = i; 848 } 849 } 850 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 851 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 852 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 853 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 854 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 855 856 if (pcbddc->switch_static || pcbddc->dbg_flag) { 857 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 858 for (i=0, j=0; i<n_R; i++) { 859 if (PetscBTLookup(bitmask,idx_R_local[i])) { 860 aux_array1[j++] = i; 861 } 862 } 863 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 864 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 866 } 867 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 873 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 874 { 875 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 876 877 PetscFunctionBegin; 878 pcbddc->use_exact_dirichlet=use; 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 884 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 885 { 886 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 887 PC_IS *pcis = (PC_IS*)pc->data; 888 PC pc_temp; 889 Mat A_RR; 890 Vec vec1,vec2,vec3; 891 MatStructure matstruct; 892 PetscScalar m_one = -1.0; 893 PetscReal value; 894 PetscInt n_D,n_R,use_exact,use_exact_reduced; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 /* Creating PC contexts for local Dirichlet and Neumann problems */ 899 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 900 901 /* DIRICHLET PROBLEM */ 902 /* Matrix for Dirichlet problem is pcis->A_II */ 903 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 904 if (!pcbddc->ksp_D) { /* create object if not yet build */ 905 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 906 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 907 /* default */ 908 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 909 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 910 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 911 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 912 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 913 } 914 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 915 /* Allow user's customization */ 916 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 917 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 918 if (!n_D) { 919 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 920 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 921 } 922 /* Set Up KSP for Dirichlet problem of BDDC */ 923 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 924 /* set ksp_D into pcis data */ 925 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 926 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 927 pcis->ksp_D = pcbddc->ksp_D; 928 929 /* NEUMANN PROBLEM */ 930 /* Matrix for Neumann problem is A_RR -> we need to create it */ 931 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 932 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 933 if (!pcbddc->ksp_R) { /* create object if not yet build */ 934 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 935 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 936 /* default */ 937 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 938 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 939 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 940 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 941 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 942 } 943 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 944 /* Allow user's customization */ 945 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 946 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 947 if (!n_R) { 948 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 949 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 950 } 951 /* Set Up KSP for Neumann problem of BDDC */ 952 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 953 954 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 955 956 /* Dirichlet */ 957 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 958 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 959 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 960 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 961 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 962 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 963 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 964 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 965 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 966 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 967 /* need to be adapted? */ 968 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 969 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 970 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 971 /* print info */ 972 if (pcbddc->dbg_flag) { 973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 978 } 979 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 980 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 981 } 982 983 /* Neumann */ 984 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 985 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 986 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 987 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 988 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 989 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 990 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 991 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 992 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 993 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 994 /* need to be adapted? */ 995 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 996 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 997 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 998 /* print info */ 999 if (pcbddc->dbg_flag) { 1000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1002 } 1003 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1005 } 1006 1007 /* free Neumann problem's matrix */ 1008 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1014 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1015 { 1016 PetscErrorCode ierr; 1017 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1018 1019 PetscFunctionBegin; 1020 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1021 if (pcbddc->local_auxmat1) { 1022 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1023 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1024 } 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1030 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1031 { 1032 PetscErrorCode ierr; 1033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1034 PC_IS* pcis = (PC_IS*) (pc->data); 1035 const PetscScalar zero = 0.0; 1036 1037 PetscFunctionBegin; 1038 /* Application of PHI^T (or PSI^T) */ 1039 if (pcbddc->coarse_psi_B) { 1040 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1041 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1042 } else { 1043 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1044 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1045 } 1046 /* Scatter data of coarse_rhs */ 1047 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1048 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 1050 /* Local solution on R nodes */ 1051 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1053 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1054 if (pcbddc->switch_static) { 1055 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1057 } 1058 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1059 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1060 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 if (pcbddc->switch_static) { 1063 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065 } 1066 1067 /* Coarse solution */ 1068 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1070 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1071 } 1072 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1074 1075 /* Sum contributions from two levels */ 1076 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1077 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1083 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1084 { 1085 PetscErrorCode ierr; 1086 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1087 1088 PetscFunctionBegin; 1089 switch (pcbddc->coarse_communications_type) { 1090 case SCATTERS_BDDC: 1091 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1092 break; 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1099 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1100 { 1101 PetscErrorCode ierr; 1102 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1103 1104 PetscFunctionBegin; 1105 switch (pcbddc->coarse_communications_type) { 1106 case SCATTERS_BDDC: 1107 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1108 break; 1109 } 1110 PetscFunctionReturn(0); 1111 } 1112 1113 /* uncomment for testing purposes */ 1114 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1117 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1118 { 1119 PetscErrorCode ierr; 1120 PC_IS* pcis = (PC_IS*)(pc->data); 1121 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1122 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1123 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1124 MatType impMatType=MATSEQAIJ; 1125 /* one and zero */ 1126 PetscScalar one=1.0,zero=0.0; 1127 /* space to store constraints and their local indices */ 1128 PetscScalar *temp_quadrature_constraint; 1129 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1130 /* iterators */ 1131 PetscInt i,j,k,total_counts,temp_start_ptr; 1132 /* stuff to store connected components stored in pcbddc->mat_graph */ 1133 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1134 PetscInt n_ISForFaces,n_ISForEdges; 1135 /* near null space stuff */ 1136 MatNullSpace nearnullsp; 1137 const Vec *nearnullvecs; 1138 Vec *localnearnullsp; 1139 PetscBool nnsp_has_cnst; 1140 PetscInt nnsp_size; 1141 PetscScalar *array; 1142 /* BLAS integers */ 1143 PetscBLASInt lwork,lierr; 1144 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1145 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1146 /* LAPACK working arrays for SVD or POD */ 1147 PetscBool skip_lapack; 1148 PetscScalar *work; 1149 PetscReal *singular_vals; 1150 #if defined(PETSC_USE_COMPLEX) 1151 PetscReal *rwork; 1152 #endif 1153 #if defined(PETSC_MISSING_LAPACK_GESVD) 1154 PetscBLASInt Blas_one_2=1; 1155 PetscScalar *temp_basis,*correlation_mat; 1156 #else 1157 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1158 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1159 #endif 1160 /* change of basis */ 1161 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1162 PetscBool boolforchange; 1163 PetscBT touched,change_basis; 1164 /* auxiliary stuff */ 1165 PetscInt *nnz,*is_indices,*local_to_B; 1166 /* some quantities */ 1167 PetscInt n_vertices,total_primal_vertices; 1168 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1169 1170 1171 PetscFunctionBegin; 1172 /* Get index sets for faces, edges and vertices from graph */ 1173 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1174 pcbddc->use_vertices = PETSC_TRUE; 1175 } 1176 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1177 /* print some info */ 1178 if (pcbddc->dbg_flag) { 1179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1180 i = 0; 1181 if (ISForVertices) { 1182 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1188 } 1189 /* check if near null space is attached to global mat */ 1190 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1191 if (nearnullsp) { 1192 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1193 } else { /* if near null space is not provided BDDC uses constants by default */ 1194 nnsp_size = 0; 1195 nnsp_has_cnst = PETSC_TRUE; 1196 } 1197 /* get max number of constraints on a single cc */ 1198 max_constraints = nnsp_size; 1199 if (nnsp_has_cnst) max_constraints++; 1200 1201 /* 1202 Evaluate maximum storage size needed by the procedure 1203 - temp_indices will contain start index of each constraint stored as follows 1204 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1205 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1206 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1207 */ 1208 total_counts = n_ISForFaces+n_ISForEdges; 1209 total_counts *= max_constraints; 1210 n_vertices = 0; 1211 if (ISForVertices) { 1212 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1213 } 1214 total_counts += n_vertices; 1215 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1216 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1217 total_counts = 0; 1218 max_size_of_constraint = 0; 1219 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1220 if (i<n_ISForEdges) { 1221 used_IS = &ISForEdges[i]; 1222 } else { 1223 used_IS = &ISForFaces[i-n_ISForEdges]; 1224 } 1225 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1226 total_counts += j; 1227 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1228 } 1229 total_counts *= max_constraints; 1230 total_counts += n_vertices; 1231 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1232 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1233 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1234 /* local to boundary numbering */ 1235 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1236 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1237 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1238 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1239 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1240 /* get local part of global near null space vectors */ 1241 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1242 for (k=0;k<nnsp_size;k++) { 1243 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1244 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 } 1247 1248 /* whether or not to skip lapack calls */ 1249 skip_lapack = PETSC_TRUE; 1250 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1251 1252 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1253 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1254 PetscScalar temp_work; 1255 #if defined(PETSC_MISSING_LAPACK_GESVD) 1256 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1257 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1258 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1259 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1260 #if defined(PETSC_USE_COMPLEX) 1261 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1262 #endif 1263 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1264 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1265 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1266 lwork = -1; 1267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1268 #if !defined(PETSC_USE_COMPLEX) 1269 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1270 #else 1271 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1272 #endif 1273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1274 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1275 #else /* on missing GESVD */ 1276 /* SVD */ 1277 PetscInt max_n,min_n; 1278 max_n = max_size_of_constraint; 1279 min_n = max_constraints; 1280 if (max_size_of_constraint < max_constraints) { 1281 min_n = max_size_of_constraint; 1282 max_n = max_constraints; 1283 } 1284 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1285 #if defined(PETSC_USE_COMPLEX) 1286 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1287 #endif 1288 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1289 lwork = -1; 1290 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1291 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1292 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1293 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1294 #if !defined(PETSC_USE_COMPLEX) 1295 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1296 #else 1297 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1298 #endif 1299 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1300 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1301 #endif /* on missing GESVD */ 1302 /* Allocate optimal workspace */ 1303 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1304 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1305 } 1306 /* Now we can loop on constraining sets */ 1307 total_counts = 0; 1308 temp_indices[0] = 0; 1309 /* vertices */ 1310 if (ISForVertices) { 1311 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1312 if (nnsp_has_cnst) { /* consider all vertices */ 1313 for (i=0;i<n_vertices;i++) { 1314 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1315 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1316 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1317 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1318 total_counts++; 1319 } 1320 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1321 PetscBool used_vertex; 1322 for (i=0;i<n_vertices;i++) { 1323 used_vertex = PETSC_FALSE; 1324 k = 0; 1325 while (!used_vertex && k<nnsp_size) { 1326 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1327 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1328 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1329 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1330 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1331 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1332 total_counts++; 1333 used_vertex = PETSC_TRUE; 1334 } 1335 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1336 k++; 1337 } 1338 } 1339 } 1340 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1341 n_vertices = total_counts; 1342 } 1343 1344 /* edges and faces */ 1345 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1346 if (i<n_ISForEdges) { 1347 used_IS = &ISForEdges[i]; 1348 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1349 } else { 1350 used_IS = &ISForFaces[i-n_ISForEdges]; 1351 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1352 } 1353 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1354 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1355 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1356 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1357 /* change of basis should not be performed on local periodic nodes */ 1358 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1359 if (nnsp_has_cnst) { 1360 PetscScalar quad_value; 1361 temp_constraints++; 1362 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1363 for (j=0;j<size_of_constraint;j++) { 1364 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1365 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1366 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1367 } 1368 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1369 if (boolforchange) { 1370 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1371 } 1372 total_counts++; 1373 } 1374 for (k=0;k<nnsp_size;k++) { 1375 PetscReal real_value; 1376 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1377 for (j=0;j<size_of_constraint;j++) { 1378 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1379 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1380 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1381 } 1382 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1383 /* check if array is null on the connected component */ 1384 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1385 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1386 if (real_value > 0.0) { /* keep indices and values */ 1387 temp_constraints++; 1388 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1389 if (boolforchange) { 1390 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1391 } 1392 total_counts++; 1393 } 1394 } 1395 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1396 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1397 if (!pcbddc->use_nnsp_true) { 1398 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1399 1400 #if defined(PETSC_MISSING_LAPACK_GESVD) 1401 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1402 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1403 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1404 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1405 from that computed using LAPACKgesvd 1406 -> This is due to a different computation of eigenvectors in LAPACKheev 1407 -> The quality of the POD-computed basis will be the same */ 1408 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1409 /* Store upper triangular part of correlation matrix */ 1410 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1411 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1412 for (j=0;j<temp_constraints;j++) { 1413 for (k=0;k<j+1;k++) { 1414 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1415 } 1416 } 1417 /* compute eigenvalues and eigenvectors of correlation matrix */ 1418 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1419 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1420 #if !defined(PETSC_USE_COMPLEX) 1421 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1422 #else 1423 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1424 #endif 1425 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1426 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1427 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1428 j=0; 1429 while (j < temp_constraints && singular_vals[j] < tol) j++; 1430 total_counts=total_counts-j; 1431 /* scale and copy POD basis into used quadrature memory */ 1432 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1433 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1434 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1435 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1436 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1438 if (j<temp_constraints) { 1439 PetscInt ii; 1440 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1441 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1442 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1443 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1444 for (k=0;k<temp_constraints-j;k++) { 1445 for (ii=0;ii<size_of_constraint;ii++) { 1446 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1447 } 1448 } 1449 } 1450 #else /* on missing GESVD */ 1451 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1454 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1455 #if !defined(PETSC_USE_COMPLEX) 1456 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1457 #else 1458 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1459 #endif 1460 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1461 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1462 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1463 k = temp_constraints; 1464 if (k > size_of_constraint) k = size_of_constraint; 1465 j = 0; 1466 while (j < k && singular_vals[k-j-1] < tol) j++; 1467 total_counts = total_counts-temp_constraints+k-j; 1468 #endif /* on missing GESVD */ 1469 } 1470 } 1471 /* free index sets of faces, edges and vertices */ 1472 for (i=0;i<n_ISForFaces;i++) { 1473 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1474 } 1475 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1476 for (i=0;i<n_ISForEdges;i++) { 1477 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1478 } 1479 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1480 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1481 1482 /* free workspace */ 1483 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1484 ierr = PetscFree(work);CHKERRQ(ierr); 1485 #if defined(PETSC_USE_COMPLEX) 1486 ierr = PetscFree(rwork);CHKERRQ(ierr); 1487 #endif 1488 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1489 #if defined(PETSC_MISSING_LAPACK_GESVD) 1490 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1491 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1492 #endif 1493 } 1494 for (k=0;k<nnsp_size;k++) { 1495 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1496 } 1497 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1498 1499 /* set quantities in pcbddc data structure */ 1500 /* n_vertices defines the number of subdomain corners in the primal space */ 1501 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1502 pcbddc->local_primal_size = total_counts; 1503 pcbddc->n_vertices = n_vertices; 1504 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1505 1506 /* Create constraint matrix */ 1507 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1508 /* so we need to set it up properly either with or without change of basis */ 1509 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1510 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1511 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1512 /* array to compute a local numbering of constraints : vertices first then constraints */ 1513 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1514 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1515 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1516 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1517 /* auxiliary stuff for basis change */ 1518 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1519 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1520 1521 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1522 total_primal_vertices=0; 1523 for (i=0;i<pcbddc->local_primal_size;i++) { 1524 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1525 if (size_of_constraint == 1) { 1526 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1527 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1528 aux_primal_minloc[total_primal_vertices]=0; 1529 total_primal_vertices++; 1530 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1531 PetscInt min_loc,min_index; 1532 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1533 /* find first untouched local node */ 1534 k = 0; 1535 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1536 min_index = global_indices[k]; 1537 min_loc = k; 1538 /* search the minimum among global nodes already untouched on the cc */ 1539 for (k=1;k<size_of_constraint;k++) { 1540 /* there can be more than one constraint on a single connected component */ 1541 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1542 min_index = global_indices[k]; 1543 min_loc = k; 1544 } 1545 } 1546 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1547 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1548 aux_primal_minloc[total_primal_vertices]=min_loc; 1549 total_primal_vertices++; 1550 } 1551 } 1552 /* free workspace */ 1553 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1554 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1555 /* permute indices in order to have a sorted set of vertices */ 1556 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1557 1558 /* nonzero structure of constraint matrix */ 1559 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1560 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1561 j=total_primal_vertices; 1562 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1563 if (!PetscBTLookup(change_basis,i)) { 1564 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1565 j++; 1566 } 1567 } 1568 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1569 ierr = PetscFree(nnz);CHKERRQ(ierr); 1570 /* set values in constraint matrix */ 1571 for (i=0;i<total_primal_vertices;i++) { 1572 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1573 } 1574 total_counts = total_primal_vertices; 1575 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1576 if (!PetscBTLookup(change_basis,i)) { 1577 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1578 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1579 total_counts++; 1580 } 1581 } 1582 /* assembling */ 1583 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1585 /* 1586 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1587 */ 1588 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1589 if (pcbddc->use_change_of_basis) { 1590 PetscBool qr_needed = PETSC_FALSE; 1591 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1592 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1593 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1594 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1595 /* work arrays */ 1596 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1597 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1598 /* nonzeros per row */ 1599 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1600 if (PetscBTLookup(change_basis,i)) { 1601 qr_needed = PETSC_TRUE; 1602 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1603 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1604 } 1605 } 1606 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1607 ierr = PetscFree(nnz);CHKERRQ(ierr); 1608 /* Set initial identity in the matrix */ 1609 for (i=0;i<pcis->n_B;i++) { 1610 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1611 } 1612 1613 /* Now we loop on the constraints which need a change of basis */ 1614 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1615 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1616 /* Change of basis matrix T computed via QR decomposition of constraints */ 1617 if (qr_needed) { 1618 /* dual and primal dofs on a single cc */ 1619 PetscInt dual_dofs,primal_dofs; 1620 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1621 PetscInt primal_counter; 1622 /* working stuff for GEQRF */ 1623 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1624 PetscBLASInt lqr_work; 1625 /* working stuff for UNGQR */ 1626 PetscScalar *gqr_work,lgqr_work_t; 1627 PetscBLASInt lgqr_work; 1628 /* working stuff for TRTRS */ 1629 PetscScalar *trs_rhs; 1630 PetscBLASInt Blas_NRHS; 1631 /* pointers for values insertion into change of basis matrix */ 1632 PetscInt *start_rows,*start_cols; 1633 PetscScalar *start_vals; 1634 /* working stuff for values insertion */ 1635 PetscBT is_primal; 1636 1637 /* space to store Q */ 1638 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1639 /* first we issue queries for optimal work */ 1640 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1641 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1642 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1643 lqr_work = -1; 1644 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1645 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1646 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1647 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1648 lgqr_work = -1; 1649 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1650 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1651 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1652 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1653 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1654 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1655 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1657 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1658 /* array to store scaling factors for reflectors */ 1659 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1660 /* array to store rhs and solution of triangular solver */ 1661 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1662 /* array to store whether a node is primal or not */ 1663 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1664 for (i=0;i<total_primal_vertices;i++) { 1665 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1666 } 1667 1668 /* allocating workspace for check */ 1669 if (pcbddc->dbg_flag) { 1670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1672 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1673 } 1674 1675 /* loop on constraints and see whether or not they need a change of basis */ 1676 /* -> using implicit ordering contained in temp_indices data */ 1677 total_counts = pcbddc->n_vertices; 1678 primal_counter = total_counts; 1679 while (total_counts<pcbddc->local_primal_size) { 1680 primal_dofs = 1; 1681 if (PetscBTLookup(change_basis,total_counts)) { 1682 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1683 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1684 primal_dofs++; 1685 } 1686 /* get constraint info */ 1687 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1688 dual_dofs = size_of_constraint-primal_dofs; 1689 1690 /* copy quadrature constraints for change of basis check */ 1691 if (pcbddc->dbg_flag) { 1692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1693 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1694 } 1695 1696 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1697 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1698 1699 /* compute QR decomposition of constraints */ 1700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1701 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1702 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1703 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1704 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1705 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1706 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1707 1708 /* explictly compute R^-T */ 1709 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1710 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1711 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1712 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1714 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1715 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1716 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1717 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1718 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1719 1720 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1726 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1727 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1728 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1729 1730 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1731 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1732 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1733 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1734 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1735 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1737 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1739 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1740 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1741 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1742 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1743 1744 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1745 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1746 /* insert cols for primal dofs */ 1747 for (j=0;j<primal_dofs;j++) { 1748 start_vals = &qr_basis[j*size_of_constraint]; 1749 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1750 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1751 } 1752 /* insert cols for dual dofs */ 1753 for (j=0,k=0;j<dual_dofs;k++) { 1754 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1755 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1756 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1757 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1758 j++; 1759 } 1760 } 1761 1762 /* check change of basis */ 1763 if (pcbddc->dbg_flag) { 1764 PetscInt ii,jj; 1765 PetscBool valid_qr=PETSC_TRUE; 1766 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1767 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1768 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1769 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1770 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1771 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1772 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1773 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1774 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1775 for (jj=0;jj<size_of_constraint;jj++) { 1776 for (ii=0;ii<primal_dofs;ii++) { 1777 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1778 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1779 } 1780 } 1781 if (!valid_qr) { 1782 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1783 for (jj=0;jj<size_of_constraint;jj++) { 1784 for (ii=0;ii<primal_dofs;ii++) { 1785 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1786 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1787 } 1788 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1789 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1790 } 1791 } 1792 } 1793 } else { 1794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1795 } 1796 } 1797 /* increment primal counter */ 1798 primal_counter += primal_dofs; 1799 } else { 1800 if (pcbddc->dbg_flag) { 1801 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1802 } 1803 } 1804 /* increment constraint counter total_counts */ 1805 total_counts += primal_dofs; 1806 } 1807 if (pcbddc->dbg_flag) { 1808 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1809 ierr = PetscFree(work);CHKERRQ(ierr); 1810 } 1811 /* free workspace */ 1812 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1813 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1814 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1815 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1816 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1817 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1818 } 1819 /* assembling */ 1820 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822 /* 1823 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1824 */ 1825 } 1826 /* free workspace */ 1827 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1828 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1829 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1830 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1831 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1832 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1833 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1834 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1840 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1841 { 1842 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1843 PC_IS *pcis = (PC_IS*)pc->data; 1844 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1845 PetscInt bs,ierr,i,vertex_size; 1846 PetscViewer viewer=pcbddc->dbg_viewer; 1847 1848 PetscFunctionBegin; 1849 /* Init local Graph struct */ 1850 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1851 1852 /* Check validity of the csr graph passed in by the user */ 1853 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1854 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1855 } 1856 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1857 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1858 Mat mat_adj; 1859 const PetscInt *xadj,*adjncy; 1860 PetscBool flg_row=PETSC_TRUE; 1861 1862 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1863 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1864 if (!flg_row) { 1865 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1866 } 1867 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1868 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1869 if (!flg_row) { 1870 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1871 } 1872 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1873 } 1874 1875 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1876 vertex_size = 1; 1877 if (!pcbddc->n_ISForDofs) { 1878 IS *custom_ISForDofs; 1879 1880 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1881 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1882 for (i=0;i<bs;i++) { 1883 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1884 } 1885 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1886 /* remove my references to IS objects */ 1887 for (i=0;i<bs;i++) { 1888 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1889 } 1890 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1891 } else { /* mat block size as vertex size (used for elasticity) */ 1892 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1893 } 1894 1895 /* Setup of Graph */ 1896 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1897 1898 /* Graph's connected components analysis */ 1899 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1900 1901 /* print some info to stdout */ 1902 if (pcbddc->dbg_flag) { 1903 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1910 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 1911 { 1912 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1913 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 n = 0; 1918 vertices = 0; 1919 if (pcbddc->ConstraintMatrix) { 1920 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1921 for (i=0;i<local_primal_size;i++) { 1922 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1923 if (size_of_constraint == 1) n++; 1924 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1925 } 1926 if (vertices_idx) { 1927 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1928 n = 0; 1929 for (i=0;i<local_primal_size;i++) { 1930 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1931 if (size_of_constraint == 1) { 1932 vertices[n++]=row_cmat_indices[0]; 1933 } 1934 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1935 } 1936 } 1937 } 1938 *n_vertices = n; 1939 if (vertices_idx) *vertices_idx = vertices; 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1945 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 1946 { 1947 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1948 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1949 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1950 PetscBT touched; 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 n = 0; 1955 constraints_index = 0; 1956 if (pcbddc->ConstraintMatrix) { 1957 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1958 max_size_of_constraint = 0; 1959 for (i=0;i<local_primal_size;i++) { 1960 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1961 if (size_of_constraint > 1) { 1962 n++; 1963 } 1964 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1965 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1966 } 1967 if (constraints_idx) { 1968 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1969 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1970 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1971 n = 0; 1972 for (i=0;i<local_primal_size;i++) { 1973 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1974 if (size_of_constraint > 1) { 1975 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1976 /* find first untouched local node */ 1977 j = 0; 1978 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1979 min_index = row_cmat_global_indices[j]; 1980 min_loc = j; 1981 /* search the minimum among nodes not yet touched on the connected component 1982 since there can be more than one constraint on a single cc */ 1983 for (j=1;j<size_of_constraint;j++) { 1984 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1985 min_index = row_cmat_global_indices[j]; 1986 min_loc = j; 1987 } 1988 } 1989 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1990 constraints_index[n++] = row_cmat_indices[min_loc]; 1991 } 1992 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1993 } 1994 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1995 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1996 } 1997 } 1998 *n_constraints = n; 1999 if (constraints_idx) *constraints_idx = constraints_index; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /* the next two functions has been adapted from pcis.c */ 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "PCBDDCApplySchur" 2006 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2007 { 2008 PetscErrorCode ierr; 2009 PC_IS *pcis = (PC_IS*)(pc->data); 2010 2011 PetscFunctionBegin; 2012 if (!vec2_B) { vec2_B = v; } 2013 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2014 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2015 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2016 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2017 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2023 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2024 { 2025 PetscErrorCode ierr; 2026 PC_IS *pcis = (PC_IS*)(pc->data); 2027 2028 PetscFunctionBegin; 2029 if (!vec2_B) { vec2_B = v; } 2030 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2031 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2032 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2033 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2034 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "PCBDDCSubsetNumbering" 2040 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2041 { 2042 Vec local_vec,global_vec; 2043 IS seqis,paris; 2044 VecScatter scatter_ctx; 2045 PetscScalar *array; 2046 PetscInt *temp_global_dofs; 2047 PetscScalar globalsum; 2048 PetscInt i,j,s; 2049 PetscInt nlocals,first_index,old_index,max_local; 2050 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2051 PetscMPIInt *dof_sizes,*dof_displs; 2052 PetscBool first_found; 2053 PetscErrorCode ierr; 2054 2055 PetscFunctionBegin; 2056 /* mpi buffers */ 2057 MPI_Comm_size(comm,&size_prec_comm); 2058 MPI_Comm_rank(comm,&rank_prec_comm); 2059 j = ( !rank_prec_comm ? size_prec_comm : 0); 2060 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2061 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2062 /* get maximum size of subset */ 2063 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2064 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2065 max_local = 0; 2066 if (n_local_dofs) { 2067 max_local = temp_global_dofs[0]; 2068 for (i=1;i<n_local_dofs;i++) { 2069 if (max_local < temp_global_dofs[i] ) { 2070 max_local = temp_global_dofs[i]; 2071 } 2072 } 2073 } 2074 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2075 max_global++; 2076 max_local = 0; 2077 if (n_local_dofs) { 2078 max_local = local_dofs[0]; 2079 for (i=1;i<n_local_dofs;i++) { 2080 if (max_local < local_dofs[i] ) { 2081 max_local = local_dofs[i]; 2082 } 2083 } 2084 } 2085 max_local++; 2086 /* allocate workspace */ 2087 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2088 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2089 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2090 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2091 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2092 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2093 /* create scatter */ 2094 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2095 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2096 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2097 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2098 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2099 /* init array */ 2100 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2101 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2102 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2103 if (local_dofs_mult) { 2104 for (i=0;i<n_local_dofs;i++) { 2105 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2106 } 2107 } else { 2108 for (i=0;i<n_local_dofs;i++) { 2109 array[local_dofs[i]]=1.0; 2110 } 2111 } 2112 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2113 /* scatter into global vec and get total number of global dofs */ 2114 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2117 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2118 /* Fill global_vec with cumulative function for global numbering */ 2119 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2120 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2121 nlocals = 0; 2122 first_index = -1; 2123 first_found = PETSC_FALSE; 2124 for (i=0;i<s;i++) { 2125 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2126 first_found = PETSC_TRUE; 2127 first_index = i; 2128 } 2129 nlocals += (PetscInt)PetscRealPart(array[i]); 2130 } 2131 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2132 if (!rank_prec_comm) { 2133 dof_displs[0]=0; 2134 for (i=1;i<size_prec_comm;i++) { 2135 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2136 } 2137 } 2138 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2139 if (first_found) { 2140 array[first_index] += (PetscScalar)nlocals; 2141 old_index = first_index; 2142 for (i=first_index+1;i<s;i++) { 2143 if (PetscRealPart(array[i]) > 0.0) { 2144 array[i] += array[old_index]; 2145 old_index = i; 2146 } 2147 } 2148 } 2149 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2150 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2151 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2152 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2153 /* get global ordering of local dofs */ 2154 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2155 if (local_dofs_mult) { 2156 for (i=0;i<n_local_dofs;i++) { 2157 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2158 } 2159 } else { 2160 for (i=0;i<n_local_dofs;i++) { 2161 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2162 } 2163 } 2164 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2165 /* free workspace */ 2166 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2167 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2168 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2169 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2170 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2171 /* return pointer to global ordering of local dofs */ 2172 *global_numbering_subset = temp_global_dofs; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2178 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2179 { 2180 PetscInt i,j; 2181 PetscScalar *alphas; 2182 PetscErrorCode ierr; 2183 2184 PetscFunctionBegin; 2185 /* this implements stabilized Gram-Schmidt */ 2186 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2187 for (i=0;i<n;i++) { 2188 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2189 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2190 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2191 } 2192 ierr = PetscFree(alphas);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 /* BDDC requires metis 5.0.1 for multilevel */ 2197 #if defined(PETSC_HAVE_METIS) 2198 #include "metis.h" 2199 #define MetisInt idx_t 2200 #define MetisScalar real_t 2201 #endif 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2205 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2206 { 2207 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2208 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2209 PC_IS *pcis = (PC_IS*)pc->data; 2210 MPI_Comm prec_comm; 2211 MPI_Comm coarse_comm; 2212 2213 PetscInt coarse_size; 2214 2215 MatNullSpace CoarseNullSpace; 2216 2217 /* common to all choiches */ 2218 PetscScalar *temp_coarse_mat_vals; 2219 PetscScalar *ins_coarse_mat_vals; 2220 PetscInt *ins_local_primal_indices; 2221 PetscMPIInt *localsizes2,*localdispl2; 2222 PetscMPIInt size_prec_comm; 2223 PetscMPIInt rank_prec_comm; 2224 PetscMPIInt active_rank=MPI_PROC_NULL; 2225 PetscMPIInt master_proc=0; 2226 PetscInt ins_local_primal_size; 2227 /* specific to MULTILEVEL_BDDC */ 2228 PetscMPIInt *ranks_recv=0; 2229 PetscMPIInt count_recv=0; 2230 PetscMPIInt rank_coarse_proc_send_to=-1; 2231 PetscMPIInt coarse_color = MPI_UNDEFINED; 2232 ISLocalToGlobalMapping coarse_ISLG; 2233 /* some other variables */ 2234 PetscErrorCode ierr; 2235 MatType coarse_mat_type; 2236 PCType coarse_pc_type; 2237 KSPType coarse_ksp_type; 2238 PC pc_temp; 2239 PetscInt i,j,k; 2240 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2241 /* verbose output viewer */ 2242 PetscViewer viewer=pcbddc->dbg_viewer; 2243 PetscInt dbg_flag=pcbddc->dbg_flag; 2244 2245 PetscInt offset,offset2; 2246 PetscMPIInt im_active,active_procs; 2247 PetscInt *dnz,*onz; 2248 2249 PetscBool setsym,issym=PETSC_FALSE; 2250 2251 PetscFunctionBegin; 2252 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2253 ins_local_primal_indices = 0; 2254 ins_coarse_mat_vals = 0; 2255 localsizes2 = 0; 2256 localdispl2 = 0; 2257 temp_coarse_mat_vals = 0; 2258 coarse_ISLG = 0; 2259 2260 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2261 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2262 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2263 2264 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 2265 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2266 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2267 } 2268 2269 /* Assign global numbering to coarse dofs */ 2270 { 2271 PetscInt *auxlocal_primal,*aux_idx; 2272 PetscMPIInt mpi_local_primal_size; 2273 PetscScalar coarsesum,*array; 2274 2275 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2276 2277 /* Construct needed data structures for message passing */ 2278 j = 0; 2279 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2280 j = size_prec_comm; 2281 } 2282 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2283 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2284 /* Gather local_primal_size information for all processes */ 2285 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2286 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2287 } else { 2288 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2289 } 2290 pcbddc->replicated_primal_size = 0; 2291 for (i=0; i<j; i++) { 2292 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2293 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2294 } 2295 2296 /* This code fragment assumes that the number of local constraints per connected component 2297 is not greater than the number of nodes defined for the connected component 2298 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2299 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2300 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2301 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2302 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2303 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2304 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2305 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2306 /* Compute number of coarse dofs */ 2307 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2308 2309 if (dbg_flag) { 2310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2311 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2312 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2313 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2314 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2315 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2316 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2317 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2318 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2319 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2321 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2322 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2323 for (i=0;i<pcis->n;i++) { 2324 if (array[i] == 1.0) { 2325 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2326 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2327 } 2328 } 2329 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2330 for (i=0;i<pcis->n;i++) { 2331 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2332 } 2333 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2334 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2335 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2336 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2337 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2338 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2339 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2340 } 2341 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2342 } 2343 2344 if (dbg_flag) { 2345 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); 2346 if (dbg_flag > 1) { 2347 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2350 for (i=0;i<pcbddc->local_primal_size;i++) { 2351 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2352 } 2353 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2354 } 2355 } 2356 2357 im_active = 0; 2358 if (pcis->n) im_active = 1; 2359 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2360 2361 /* adapt coarse problem type */ 2362 #if defined(PETSC_HAVE_METIS) 2363 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2364 if (pcbddc->current_level < pcbddc->max_levels) { 2365 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2366 if (dbg_flag) { 2367 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2368 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2369 } 2370 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2371 } 2372 } else { 2373 if (dbg_flag) { 2374 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2375 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2376 } 2377 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2378 } 2379 } 2380 #else 2381 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2382 #endif 2383 2384 switch(pcbddc->coarse_problem_type){ 2385 2386 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2387 { 2388 #if defined(PETSC_HAVE_METIS) 2389 /* we need additional variables */ 2390 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2391 MetisInt *metis_coarse_subdivision; 2392 MetisInt options[METIS_NOPTIONS]; 2393 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2394 PetscMPIInt procs_jumps_coarse_comm; 2395 PetscMPIInt *coarse_subdivision; 2396 PetscMPIInt *total_count_recv; 2397 PetscMPIInt *total_ranks_recv; 2398 PetscMPIInt *displacements_recv; 2399 PetscMPIInt *my_faces_connectivity; 2400 PetscMPIInt *petsc_faces_adjncy; 2401 MetisInt *faces_adjncy; 2402 MetisInt *faces_xadj; 2403 PetscMPIInt *number_of_faces; 2404 PetscMPIInt *faces_displacements; 2405 PetscInt *array_int; 2406 PetscMPIInt my_faces=0; 2407 PetscMPIInt total_faces=0; 2408 PetscInt ranks_stretching_ratio; 2409 2410 /* define some quantities */ 2411 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2412 coarse_mat_type = MATIS; 2413 coarse_pc_type = PCBDDC; 2414 coarse_ksp_type = KSPRICHARDSON; 2415 2416 /* details of coarse decomposition */ 2417 n_subdomains = active_procs; 2418 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2419 ranks_stretching_ratio = size_prec_comm/active_procs; 2420 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2421 2422 #if 0 2423 PetscMPIInt *old_ranks; 2424 PetscInt *new_ranks,*jj,*ii; 2425 MatPartitioning mat_part; 2426 IS coarse_new_decomposition,is_numbering; 2427 PetscViewer viewer_test; 2428 MPI_Comm test_coarse_comm; 2429 PetscMPIInt test_coarse_color; 2430 Mat mat_adj; 2431 /* Create new communicator for coarse problem splitting the old one */ 2432 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2433 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2434 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2435 test_coarse_comm = MPI_COMM_NULL; 2436 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2437 if (im_active) { 2438 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2439 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2440 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2441 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2442 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2443 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2444 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2445 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2446 k = pcis->n_neigh-1; 2447 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2448 ii[0]=0; 2449 ii[1]=k; 2450 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2451 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2452 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2453 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2454 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2455 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2456 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2457 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2458 printf("Setting Nparts %d\n",n_parts); 2459 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2460 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2461 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2462 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2463 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2464 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2465 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2466 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2467 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2468 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2469 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2470 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2471 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2472 } 2473 #endif 2474 2475 /* build CSR graph of subdomains' connectivity */ 2476 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2477 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2478 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2479 for (j=0;j<pcis->n_shared[i];j++){ 2480 array_int[ pcis->shared[i][j] ]+=1; 2481 } 2482 } 2483 for (i=1;i<pcis->n_neigh;i++){ 2484 for (j=0;j<pcis->n_shared[i];j++){ 2485 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2486 my_faces++; 2487 break; 2488 } 2489 } 2490 } 2491 2492 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2493 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2494 my_faces=0; 2495 for (i=1;i<pcis->n_neigh;i++){ 2496 for (j=0;j<pcis->n_shared[i];j++){ 2497 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2498 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2499 my_faces++; 2500 break; 2501 } 2502 } 2503 } 2504 if (rank_prec_comm == master_proc) { 2505 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2506 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2507 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2508 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2509 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2510 } 2511 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2512 if (rank_prec_comm == master_proc) { 2513 faces_xadj[0]=0; 2514 faces_displacements[0]=0; 2515 j=0; 2516 for (i=1;i<size_prec_comm+1;i++) { 2517 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2518 if (number_of_faces[i-1]) { 2519 j++; 2520 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2521 } 2522 } 2523 } 2524 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2525 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2526 ierr = PetscFree(array_int);CHKERRQ(ierr); 2527 if (rank_prec_comm == master_proc) { 2528 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2529 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2530 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2531 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2532 } 2533 2534 if ( rank_prec_comm == master_proc ) { 2535 2536 PetscInt heuristic_for_metis=3; 2537 2538 ncon=1; 2539 faces_nvtxs=n_subdomains; 2540 /* partition graoh induced by face connectivity */ 2541 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2542 ierr = METIS_SetDefaultOptions(options); 2543 /* we need a contiguous partition of the coarse mesh */ 2544 options[METIS_OPTION_CONTIG]=1; 2545 options[METIS_OPTION_NITER]=30; 2546 if (pcbddc->coarsening_ratio > 1) { 2547 if (n_subdomains>n_parts*heuristic_for_metis) { 2548 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2549 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2550 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2551 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2552 } else { 2553 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2554 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2555 } 2556 } else { 2557 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2558 } 2559 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2560 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2561 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2562 2563 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2564 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2565 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2566 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2567 } 2568 2569 /* Create new communicator for coarse problem splitting the old one */ 2570 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2571 coarse_color=0; /* for communicator splitting */ 2572 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2573 } 2574 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2575 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2576 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2577 2578 if ( coarse_color == 0 ) { 2579 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2580 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2581 } else { 2582 rank_coarse_comm = MPI_PROC_NULL; 2583 } 2584 2585 /* master proc take care of arranging and distributing coarse information */ 2586 if (rank_coarse_comm == master_proc) { 2587 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2588 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2589 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2590 /* some initializations */ 2591 displacements_recv[0]=0; 2592 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2593 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2594 for (j=0;j<size_coarse_comm;j++) { 2595 for (i=0;i<size_prec_comm;i++) { 2596 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2597 } 2598 } 2599 /* displacements needed for scatterv of total_ranks_recv */ 2600 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2601 2602 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2603 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2604 for (j=0;j<size_coarse_comm;j++) { 2605 for (i=0;i<size_prec_comm;i++) { 2606 if (coarse_subdivision[i]==j) { 2607 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2608 total_count_recv[j]+=1; 2609 } 2610 } 2611 } 2612 /*for (j=0;j<size_coarse_comm;j++) { 2613 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2614 for (i=0;i<total_count_recv[j];i++) { 2615 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2616 } 2617 printf("\n"); 2618 }*/ 2619 2620 /* identify new decomposition in terms of ranks in the old communicator */ 2621 for (i=0;i<n_subdomains;i++) { 2622 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2623 } 2624 /*printf("coarse_subdivision in old end new ranks\n"); 2625 for (i=0;i<size_prec_comm;i++) 2626 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2627 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2628 } else { 2629 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2630 } 2631 printf("\n");*/ 2632 } 2633 2634 /* Scatter new decomposition for send details */ 2635 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2636 /* Scatter receiving details to members of coarse decomposition */ 2637 if ( coarse_color == 0) { 2638 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2639 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2640 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2641 } 2642 2643 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2644 if (coarse_color == 0) { 2645 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2646 for (i=0;i<count_recv;i++) 2647 printf("%d ",ranks_recv[i]); 2648 printf("\n"); 2649 }*/ 2650 2651 if (rank_prec_comm == master_proc) { 2652 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2653 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2654 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2655 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2656 } 2657 #endif 2658 break; 2659 } 2660 2661 case(PARALLEL_BDDC): 2662 2663 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2664 coarse_mat_type = MATAIJ; 2665 coarse_pc_type = PCREDUNDANT; 2666 coarse_ksp_type = KSPPREONLY; 2667 coarse_comm = prec_comm; 2668 active_rank = rank_prec_comm; 2669 break; 2670 2671 } 2672 2673 switch(pcbddc->coarse_communications_type){ 2674 2675 case(SCATTERS_BDDC): 2676 { 2677 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2678 2679 IS coarse_IS; 2680 2681 if(pcbddc->coarsening_ratio == 1) { 2682 ins_local_primal_size = pcbddc->local_primal_size; 2683 ins_local_primal_indices = pcbddc->local_primal_indices; 2684 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2685 /* nonzeros */ 2686 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2687 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2688 for (i=0;i<ins_local_primal_size;i++) { 2689 dnz[i] = ins_local_primal_size; 2690 } 2691 } else { 2692 PetscMPIInt send_size; 2693 PetscMPIInt *send_buffer; 2694 PetscInt *aux_ins_indices; 2695 PetscInt ii,jj; 2696 MPI_Request *requests; 2697 2698 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2699 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2700 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2701 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2702 pcbddc->replicated_primal_size = count_recv; 2703 j = 0; 2704 for (i=0;i<count_recv;i++) { 2705 pcbddc->local_primal_displacements[i] = j; 2706 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2707 } 2708 pcbddc->local_primal_displacements[count_recv] = j; 2709 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2710 /* allocate auxiliary space */ 2711 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2712 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2713 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2714 /* allocate stuffs for message massing */ 2715 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2716 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2717 /* send indices to be inserted */ 2718 for (i=0;i<count_recv;i++) { 2719 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2720 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2721 } 2722 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2723 send_size = pcbddc->local_primal_size; 2724 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2725 for (i=0;i<send_size;i++) { 2726 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2727 } 2728 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2729 } 2730 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2731 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2732 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2733 } 2734 j = 0; 2735 for (i=0;i<count_recv;i++) { 2736 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2737 localsizes2[i] = ii*ii; 2738 localdispl2[i] = j; 2739 j += localsizes2[i]; 2740 jj = pcbddc->local_primal_displacements[i]; 2741 /* it counts the coarse subdomains sharing the coarse node */ 2742 for (k=0;k<ii;k++) { 2743 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2744 } 2745 } 2746 /* temp_coarse_mat_vals used to store matrix values to be received */ 2747 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2748 /* evaluate how many values I will insert in coarse mat */ 2749 ins_local_primal_size = 0; 2750 for (i=0;i<coarse_size;i++) { 2751 if (aux_ins_indices[i]) { 2752 ins_local_primal_size++; 2753 } 2754 } 2755 /* evaluate indices I will insert in coarse mat */ 2756 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2757 j = 0; 2758 for(i=0;i<coarse_size;i++) { 2759 if(aux_ins_indices[i]) { 2760 ins_local_primal_indices[j] = i; 2761 j++; 2762 } 2763 } 2764 /* processes partecipating in coarse problem receive matrix data from their friends */ 2765 for (i=0;i<count_recv;i++) { 2766 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2767 } 2768 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2769 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2770 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2771 } 2772 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2773 /* nonzeros */ 2774 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2775 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2776 /* use aux_ins_indices to realize a global to local mapping */ 2777 j=0; 2778 for(i=0;i<coarse_size;i++){ 2779 if(aux_ins_indices[i]==0){ 2780 aux_ins_indices[i]=-1; 2781 } else { 2782 aux_ins_indices[i]=j; 2783 j++; 2784 } 2785 } 2786 for (i=0;i<count_recv;i++) { 2787 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2788 for (k=0;k<j;k++) { 2789 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2790 } 2791 } 2792 /* check */ 2793 for (i=0;i<ins_local_primal_size;i++) { 2794 if (dnz[i] > ins_local_primal_size) { 2795 dnz[i] = ins_local_primal_size; 2796 } 2797 } 2798 ierr = PetscFree(requests);CHKERRQ(ierr); 2799 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2800 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2801 } 2802 /* create local to global mapping needed by coarse MATIS */ 2803 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2804 coarse_comm = prec_comm; 2805 active_rank = rank_prec_comm; 2806 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2808 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2809 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2810 /* arrays for values insertion */ 2811 ins_local_primal_size = pcbddc->local_primal_size; 2812 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2813 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2814 for (j=0;j<ins_local_primal_size;j++){ 2815 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2816 for (i=0;i<ins_local_primal_size;i++) { 2817 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2818 } 2819 } 2820 } 2821 break; 2822 2823 } 2824 2825 } 2826 2827 /* Now create and fill up coarse matrix */ 2828 if ( rank_prec_comm == active_rank ) { 2829 2830 Mat matis_coarse_local_mat; 2831 2832 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2833 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2834 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2835 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2836 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2837 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2838 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2839 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2840 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2841 } else { 2842 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2843 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2844 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2845 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2846 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2847 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2848 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2849 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2850 } 2851 /* preallocation */ 2852 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2853 2854 PetscInt lrows,lcols,bs; 2855 2856 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2857 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2858 ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr); 2859 2860 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2861 2862 Vec vec_dnz,vec_onz; 2863 PetscScalar *my_dnz,*my_onz,*array; 2864 PetscInt *mat_ranges,*row_ownership; 2865 PetscInt coarse_index_row,coarse_index_col,owner; 2866 2867 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 2868 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2869 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 2870 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 2871 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2872 2873 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 2874 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 2875 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 2876 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 2877 2878 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 2879 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2880 for (i=0;i<size_prec_comm;i++) { 2881 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2882 row_ownership[j]=i; 2883 } 2884 } 2885 2886 for (i=0;i<pcbddc->local_primal_size;i++) { 2887 coarse_index_row = pcbddc->local_primal_indices[i]; 2888 owner = row_ownership[coarse_index_row]; 2889 for (j=i;j<pcbddc->local_primal_size;j++) { 2890 owner = row_ownership[coarse_index_row]; 2891 coarse_index_col = pcbddc->local_primal_indices[j]; 2892 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 2893 my_dnz[i] += 1.0; 2894 } else { 2895 my_onz[i] += 1.0; 2896 } 2897 if (i != j) { 2898 owner = row_ownership[coarse_index_col]; 2899 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 2900 my_dnz[j] += 1.0; 2901 } else { 2902 my_onz[j] += 1.0; 2903 } 2904 } 2905 } 2906 } 2907 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2908 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2909 if (pcbddc->local_primal_size) { 2910 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2911 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2912 } 2913 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2914 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2915 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2916 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2917 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 2918 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2919 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2920 2921 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2922 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2923 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2924 2925 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2926 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2927 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2928 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2930 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2931 } 2932 2933 /* check */ 2934 for (i=0;i<lrows;i++) { 2935 if (dnz[i]>lcols) dnz[i]=lcols; 2936 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 2937 } 2938 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 2939 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 2940 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2941 } else { 2942 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 2943 ierr = PetscFree(dnz);CHKERRQ(ierr); 2944 } 2945 /* insert values */ 2946 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2947 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2948 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2949 if (pcbddc->coarsening_ratio == 1) { 2950 ins_coarse_mat_vals = coarse_submat_vals; 2951 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2952 } else { 2953 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 2954 for (k=0;k<pcbddc->replicated_primal_size;k++) { 2955 offset = pcbddc->local_primal_displacements[k]; 2956 offset2 = localdispl2[k]; 2957 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 2958 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2959 for (j=0;j<ins_local_primal_size;j++){ 2960 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 2961 } 2962 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 2963 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2964 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 2965 } 2966 } 2967 ins_local_primal_indices = 0; 2968 ins_coarse_mat_vals = 0; 2969 } 2970 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 /* symmetry of coarse matrix */ 2973 if (issym) { 2974 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2975 } 2976 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2977 } 2978 2979 /* create loc to glob scatters if needed */ 2980 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2981 IS local_IS,global_IS; 2982 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2983 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2984 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2985 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2986 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2987 } 2988 2989 /* free memory no longer needed */ 2990 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2991 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2992 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 2993 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 2994 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 2995 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 2996 2997 /* Compute coarse null space */ 2998 CoarseNullSpace = 0; 2999 if (pcbddc->NullSpace) { 3000 ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr); 3001 } 3002 3003 /* KSP for coarse problem */ 3004 if (rank_prec_comm == active_rank) { 3005 PetscBool isbddc=PETSC_FALSE; 3006 3007 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3008 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3009 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3010 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3011 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3012 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3013 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3014 /* Allow user's customization */ 3015 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3016 /* Set Up PC for coarse problem BDDC */ 3017 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3018 i = pcbddc->current_level+1; 3019 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3020 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3021 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3022 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3023 if (CoarseNullSpace) { 3024 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3025 } 3026 if (dbg_flag) { 3027 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3028 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3029 } 3030 } else { 3031 if (CoarseNullSpace) { 3032 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3033 } 3034 } 3035 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3036 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3037 3038 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3039 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3040 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3041 if (j == 1) { 3042 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3043 if (isbddc) { 3044 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3045 } 3046 } 3047 } 3048 /* Check coarse problem if requested */ 3049 if ( dbg_flag && rank_prec_comm == active_rank ) { 3050 KSP check_ksp; 3051 PC check_pc; 3052 Vec check_vec; 3053 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3054 KSPType check_ksp_type; 3055 3056 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3057 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3058 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3059 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3060 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3061 if (issym) check_ksp_type = KSPCG; 3062 else check_ksp_type = KSPGMRES; 3063 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3064 } else { 3065 check_ksp_type = KSPPREONLY; 3066 } 3067 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3068 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3069 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3070 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3071 /* create random vec */ 3072 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3073 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3074 if (CoarseNullSpace) { 3075 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3076 } 3077 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3078 /* solve coarse problem */ 3079 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3080 if (CoarseNullSpace) { 3081 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3082 } 3083 /* check coarse problem residual error */ 3084 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3085 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3086 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3087 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3088 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3089 /* get eigenvalue estimation if inexact */ 3090 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3091 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3092 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3093 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3094 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3095 } 3096 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3097 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3098 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3099 } 3100 if (dbg_flag) { 3101 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3102 } 3103 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3104 3105 PetscFunctionReturn(0); 3106 } 3107 3108