1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 105 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 106 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 107 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 110 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 113 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 114 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 116 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 117 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 118 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 119 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 120 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 121 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "PCBDDCCreateWorkVectors" 127 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 128 { 129 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 130 PC_IS *pcis = (PC_IS*)pc->data; 131 VecType impVecType; 132 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 137 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 138 local_primal_size = n_constraints+n_vertices; 139 n_R = pcis->n-n_vertices; 140 /* local work vectors */ 141 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 142 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 148 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 149 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 150 if (n_constraints) { 151 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 152 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 153 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 160 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 161 { 162 PetscErrorCode ierr; 163 /* pointers to pcis and pcbddc */ 164 PC_IS* pcis = (PC_IS*)pc->data; 165 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 166 /* submatrices of local problem */ 167 Mat A_RV,A_VR,A_VV; 168 /* working matrices */ 169 Mat M1,M2,M3,C_CR; 170 /* working vectors */ 171 Vec vec1_C,vec2_C,vec1_V,vec2_V; 172 /* additional working stuff */ 173 IS is_aux; 174 ISLocalToGlobalMapping BtoNmap; 175 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 176 const PetscScalar *array,*row_cmat_values; 177 const PetscInt *row_cmat_indices,*idx_R_local; 178 PetscInt *vertices,*idx_V_B,*auxindices; 179 PetscInt n_vertices,n_constraints,size_of_constraint; 180 PetscInt i,j,n_R,n_D,n_B; 181 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 182 /* Vector and matrix types */ 183 VecType impVecType; 184 MatType impMatType; 185 /* some shortcuts to scalars */ 186 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 187 /* for debugging purposes */ 188 PetscReal *coarsefunctions_errors,*constraints_errors; 189 190 PetscFunctionBegin; 191 /* get number of vertices and their local indices */ 192 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 193 n_constraints = pcbddc->local_primal_size-n_vertices; 194 /* Set Non-overlapping dimensions */ 195 n_B = pcis->n_B; n_D = pcis->n - n_B; 196 n_R = pcis->n-n_vertices; 197 198 /* Set types for local objects needed by BDDC precondtioner */ 199 impMatType = MATSEQDENSE; 200 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 201 202 /* Allocating some extra storage just to be safe */ 203 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 204 for (i=0;i<pcis->n;i++) auxindices[i]=i; 205 206 /* vertices in boundary numbering */ 207 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 209 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 210 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 211 if (i != n_vertices) { 212 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 213 } 214 215 /* some work vectors on vertices and/or constraints */ 216 if (n_vertices) { 217 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 218 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 219 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 220 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 221 } 222 if (n_constraints) { 223 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 224 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 225 } 226 227 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 228 if (n_constraints) { 229 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 230 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 231 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 232 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 233 234 /* Extract constraints on R nodes: C_{CR} */ 235 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 236 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 237 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 238 239 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 240 for (i=0;i<n_constraints;i++) { 241 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 242 /* Get row of constraint matrix in R numbering */ 243 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 244 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 245 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 246 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 247 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 248 /* Solve for row of constraint matrix in R numbering */ 249 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 250 /* Set values in local_auxmat2 */ 251 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 252 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 254 } 255 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 256 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 258 259 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 260 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 261 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 262 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 263 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 264 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 265 ierr = MatSetUp(M1);CHKERRQ(ierr); 266 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 267 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 268 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 269 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 271 ierr = MatDestroy(&M2);CHKERRQ(ierr); 272 ierr = MatDestroy(&M3);CHKERRQ(ierr); 273 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 274 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 275 } 276 277 /* Get submatrices from subdomain matrix */ 278 if (n_vertices) { 279 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 281 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 282 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 283 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 284 } 285 286 /* Matrix of coarse basis functions (local) */ 287 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 288 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 289 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 290 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 291 if (pcbddc->switch_static || pcbddc->dbg_flag) { 292 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 293 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 294 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 295 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 296 } 297 298 if (pcbddc->dbg_flag) { 299 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 300 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 301 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 302 } 303 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 304 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 305 306 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 307 308 /* vertices */ 309 for (i=0;i<n_vertices;i++) { 310 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 311 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 312 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 313 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 314 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 315 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 316 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 317 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 318 if (n_constraints) { 319 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 320 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 322 } 323 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 324 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 325 326 /* Set values in coarse basis function and subdomain part of coarse_mat */ 327 /* coarse basis functions */ 328 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 329 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 330 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 331 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 332 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 333 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 334 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 335 if (pcbddc->switch_static || pcbddc->dbg_flag) { 336 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 337 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 338 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 339 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 341 } 342 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 343 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 344 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 345 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 346 if (n_constraints) { 347 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 350 } 351 352 /* check */ 353 if (pcbddc->dbg_flag) { 354 /* assemble subdomain vector on local nodes */ 355 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 356 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 357 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 358 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 359 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 360 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 361 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 362 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 363 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 364 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 365 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 366 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 367 if (n_constraints) { 368 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 371 } 372 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 373 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 375 /* check saddle point solution */ 376 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 377 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 378 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 379 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 380 /* shift by the identity matrix */ 381 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 382 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 383 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 384 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 385 } 386 } 387 388 /* constraints */ 389 for (i=0;i<n_constraints;i++) { 390 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 391 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 392 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 393 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 394 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 395 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 396 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 397 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 398 if (n_vertices) { 399 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 400 } 401 /* Set values in coarse basis function and subdomain part of coarse_mat */ 402 /* coarse basis functions */ 403 j = i+n_vertices; /* don't touch this! */ 404 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 405 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 410 if (pcbddc->switch_static || pcbddc->dbg_flag) { 411 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 414 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 416 } 417 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 418 if (n_vertices) { 419 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 420 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 422 } 423 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 426 427 if (pcbddc->dbg_flag) { 428 /* assemble subdomain vector on nodes */ 429 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 435 /* assemble subdomain vector of lagrange multipliers */ 436 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 437 if (n_vertices) { 438 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 439 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 441 } 442 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 445 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 446 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 448 /* check saddle point solution */ 449 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 450 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 451 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 452 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 453 /* shift by the identity matrix */ 454 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 455 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 456 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 458 } 459 } 460 /* call assembling routines for local coarse basis */ 461 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 462 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 463 if (pcbddc->switch_static || pcbddc->dbg_flag) { 464 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 465 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 } 467 468 /* compute other basis functions for non-symmetric problems */ 469 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 470 if (!setsym || (setsym && !issym)) { 471 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 472 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 473 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 474 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 477 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 478 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 479 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 480 } 481 for (i=0;i<pcbddc->local_primal_size;i++) { 482 if (n_constraints) { 483 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 484 for (j=0;j<n_constraints;j++) { 485 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 486 } 487 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 488 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 489 } 490 if (i<n_vertices) { 491 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 492 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 493 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 494 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 495 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 496 if (n_constraints) { 497 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 498 } 499 } else { 500 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 501 } 502 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 503 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 504 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 507 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 509 if (i<n_vertices) { 510 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 511 } 512 if (pcbddc->switch_static || pcbddc->dbg_flag) { 513 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 514 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 515 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 516 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 518 } 519 520 if (pcbddc->dbg_flag) { 521 /* assemble subdomain vector on nodes */ 522 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 523 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 524 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 525 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 526 if (i<n_vertices) { 527 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 528 } 529 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 530 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 531 /* assemble subdomain vector of lagrange multipliers */ 532 for (j=0;j<pcbddc->local_primal_size;j++) { 533 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 534 } 535 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 536 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 537 /* check saddle point solution */ 538 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 539 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 540 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 541 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 542 /* shift by the identity matrix */ 543 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 544 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 545 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 546 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 547 } 548 } 549 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 if (pcbddc->switch_static || pcbddc->dbg_flag) { 552 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 } 555 } 556 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 557 /* Checking coarse_sub_mat and coarse basis functios */ 558 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 559 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 560 if (pcbddc->dbg_flag) { 561 Mat coarse_sub_mat; 562 Mat AUXMAT,TM1,TM2,TM3,TM4; 563 Mat coarse_phi_D,coarse_phi_B; 564 Mat coarse_psi_D,coarse_psi_B; 565 Mat A_II,A_BB,A_IB,A_BI; 566 MatType checkmattype=MATSEQAIJ; 567 PetscReal real_value; 568 569 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 570 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 571 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 572 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 573 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 574 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 575 if (pcbddc->coarse_psi_B) { 576 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 577 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 578 } 579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 580 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 584 if (pcbddc->coarse_psi_B) { 585 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 586 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 588 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 589 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 591 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 592 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 594 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 } else { 598 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 599 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 600 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 601 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 602 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 603 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 604 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 605 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 606 } 607 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 608 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 609 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 610 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 611 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 612 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 617 for (i=0;i<pcbddc->local_primal_size;i++) { 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 619 } 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 621 for (i=0;i<pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 623 } 624 if (pcbddc->coarse_psi_B) { 625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 626 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 628 } 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 630 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 632 } 633 } 634 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 635 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 636 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 637 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 638 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 639 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 640 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 641 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 642 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 643 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 644 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 645 if (pcbddc->coarse_psi_B) { 646 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 647 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 648 } 649 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 650 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 651 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 652 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 653 } 654 /* free memory */ 655 if (n_vertices) { 656 ierr = PetscFree(vertices);CHKERRQ(ierr); 657 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 658 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 659 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 660 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 661 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 662 } 663 if (n_constraints) { 664 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 665 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 666 ierr = MatDestroy(&M1);CHKERRQ(ierr); 667 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 668 } 669 ierr = PetscFree(auxindices);CHKERRQ(ierr); 670 /* get back data */ 671 *coarse_submat_vals_n = coarse_submat_vals; 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 677 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 678 { 679 PC_IS* pcis = (PC_IS*)(pc->data); 680 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 681 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 682 /* manage repeated solves */ 683 MatReuse reuse; 684 MatStructure matstruct; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* get mat flags */ 689 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 690 reuse = MAT_INITIAL_MATRIX; 691 if (pc->setupcalled) { 692 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 693 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 694 if (matstruct == SAME_NONZERO_PATTERN) { 695 reuse = MAT_REUSE_MATRIX; 696 } else { 697 reuse = MAT_INITIAL_MATRIX; 698 } 699 } 700 if (reuse == MAT_INITIAL_MATRIX) { 701 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 702 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 705 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 706 } 707 708 /* transform local matrices if needed */ 709 if (pcbddc->use_change_of_basis) { 710 Mat change_mat_all; 711 PetscScalar *row_cmat_values; 712 PetscInt *row_cmat_indices; 713 PetscInt *nnz,*is_indices,*temp_indices; 714 PetscInt i,j,k,n_D,n_B; 715 716 /* Get Non-overlapping dimensions */ 717 n_B = pcis->n_B; 718 n_D = pcis->n-n_B; 719 720 /* compute nonzero structure of change of basis on all local nodes */ 721 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 722 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 723 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 724 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 725 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 k=1; 727 for (i=0;i<n_B;i++) { 728 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 729 nnz[is_indices[i]]=j; 730 if (k < j) k = j; 731 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 732 } 733 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 734 /* assemble change of basis matrix on the whole set of local dofs */ 735 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 736 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 737 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 738 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 739 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 740 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 for (i=0;i<n_D;i++) { 742 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 743 } 744 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 745 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 746 for (i=0;i<n_B;i++) { 747 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 748 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 749 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 750 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 751 } 752 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 753 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 754 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 755 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 756 if (i==1) { 757 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 758 } else { 759 Mat work_mat; 760 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 761 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 762 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 763 } 764 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 765 ierr = PetscFree(nnz);CHKERRQ(ierr); 766 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 767 } else { 768 /* without change of basis, the local matrix is unchanged */ 769 if (!pcbddc->local_mat) { 770 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 771 pcbddc->local_mat = matis->A; 772 } 773 } 774 775 /* get submatrices */ 776 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 777 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 778 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 779 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 785 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 786 { 787 PC_IS* pcis = (PC_IS*)(pc->data); 788 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 789 IS is_aux1,is_aux2; 790 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 791 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 792 PetscBT bitmask; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 /* Set Non-overlapping dimensions */ 797 n_B = pcis->n_B; n_D = pcis->n - n_B; 798 /* get vertex indices from constraint matrix */ 799 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 800 /* Set number of constraints */ 801 n_constraints = pcbddc->local_primal_size-n_vertices; 802 /* create auxiliary bitmask */ 803 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 804 for (i=0;i<n_vertices;i++) { 805 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 806 } 807 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 808 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 809 for (i=0, n_R=0; i<pcis->n; i++) { 810 if (!PetscBTLookup(bitmask,i)) { 811 idx_R_local[n_R] = i; 812 n_R++; 813 } 814 } 815 ierr = PetscFree(vertices);CHKERRQ(ierr); 816 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 817 818 /* print some info if requested */ 819 if (pcbddc->dbg_flag) { 820 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 821 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 822 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 826 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 827 } 828 829 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 830 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 831 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 832 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 for (i=0; i<n_D; i++) { 834 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 835 } 836 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 837 for (i=0, j=0; i<n_R; i++) { 838 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 839 aux_array1[j++] = i; 840 } 841 } 842 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 843 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 844 for (i=0, j=0; i<n_B; i++) { 845 if (!PetscBTLookup(bitmask,is_indices[i])) { 846 aux_array2[j++] = i; 847 } 848 } 849 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 850 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 851 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 852 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 853 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 854 855 if (pcbddc->switch_static || pcbddc->dbg_flag) { 856 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 857 for (i=0, j=0; i<n_R; i++) { 858 if (PetscBTLookup(bitmask,idx_R_local[i])) { 859 aux_array1[j++] = i; 860 } 861 } 862 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 863 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 864 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 865 } 866 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 872 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 873 { 874 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 875 876 PetscFunctionBegin; 877 pcbddc->use_exact_dirichlet=use; 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 883 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 884 { 885 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 886 PC_IS *pcis = (PC_IS*)pc->data; 887 PC pc_temp; 888 Mat A_RR; 889 Vec vec1,vec2,vec3; 890 MatStructure matstruct; 891 PetscScalar m_one = -1.0; 892 PetscReal value; 893 PetscInt n_D,n_R,use_exact,use_exact_reduced; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 /* Creating PC contexts for local Dirichlet and Neumann problems */ 898 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 899 900 /* DIRICHLET PROBLEM */ 901 /* Matrix for Dirichlet problem is pcis->A_II */ 902 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 903 if (!pcbddc->ksp_D) { /* create object if not yet build */ 904 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 905 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 906 /* default */ 907 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 908 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 909 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 910 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 911 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 912 } 913 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 914 /* Allow user's customization */ 915 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 916 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 917 if (!n_D) { 918 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 919 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 920 } 921 /* Set Up KSP for Dirichlet problem of BDDC */ 922 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 923 /* set ksp_D into pcis data */ 924 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 925 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 926 pcis->ksp_D = pcbddc->ksp_D; 927 928 /* NEUMANN PROBLEM */ 929 /* Matrix for Neumann problem is A_RR -> we need to create it */ 930 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 931 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 932 if (!pcbddc->ksp_R) { /* create object if not yet build */ 933 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 934 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 935 /* default */ 936 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 937 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 938 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 939 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 940 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 941 } 942 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 943 /* Allow user's customization */ 944 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 945 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 946 if (!n_R) { 947 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 948 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 949 } 950 /* Set Up KSP for Neumann problem of BDDC */ 951 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 952 953 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 954 955 /* Dirichlet */ 956 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 957 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 958 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 959 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 960 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 961 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 962 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 963 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 964 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 965 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 966 /* need to be adapted? */ 967 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 968 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 969 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 970 /* print info */ 971 if (pcbddc->dbg_flag) { 972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 973 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 977 } 978 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 979 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 980 } 981 982 /* Neumann */ 983 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 984 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 985 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 986 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 987 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 988 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 989 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 990 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 991 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 992 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 993 /* need to be adapted? */ 994 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 995 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 996 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 997 /* print info */ 998 if (pcbddc->dbg_flag) { 999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1001 } 1002 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1003 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1004 } 1005 1006 /* free Neumann problem's matrix */ 1007 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1013 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1014 { 1015 PetscErrorCode ierr; 1016 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1017 1018 PetscFunctionBegin; 1019 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1020 if (pcbddc->local_auxmat1) { 1021 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1022 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1029 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1030 { 1031 PetscErrorCode ierr; 1032 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1033 PC_IS* pcis = (PC_IS*) (pc->data); 1034 const PetscScalar zero = 0.0; 1035 1036 PetscFunctionBegin; 1037 /* Application of PHI^T (or PSI^T) */ 1038 if (pcbddc->coarse_psi_B) { 1039 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1040 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1041 } else { 1042 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1043 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1044 } 1045 /* Scatter data of coarse_rhs */ 1046 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1047 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1048 1049 /* Local solution on R nodes */ 1050 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1051 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1052 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1053 if (pcbddc->switch_static) { 1054 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 } 1057 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1058 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1059 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 if (pcbddc->switch_static) { 1062 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 } 1065 1066 /* Coarse solution */ 1067 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1068 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1069 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1070 } 1071 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 1074 /* Sum contributions from two levels */ 1075 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1076 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1082 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1083 { 1084 PetscErrorCode ierr; 1085 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1086 1087 PetscFunctionBegin; 1088 switch (pcbddc->coarse_communications_type) { 1089 case SCATTERS_BDDC: 1090 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1091 break; 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1098 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1099 { 1100 PetscErrorCode ierr; 1101 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1102 1103 PetscFunctionBegin; 1104 switch (pcbddc->coarse_communications_type) { 1105 case SCATTERS_BDDC: 1106 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1107 break; 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* uncomment for testing purposes */ 1113 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1116 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1117 { 1118 PetscErrorCode ierr; 1119 PC_IS* pcis = (PC_IS*)(pc->data); 1120 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1121 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1122 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1123 MatType impMatType=MATSEQAIJ; 1124 /* one and zero */ 1125 PetscScalar one=1.0,zero=0.0; 1126 /* space to store constraints and their local indices */ 1127 PetscScalar *temp_quadrature_constraint; 1128 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1129 /* iterators */ 1130 PetscInt i,j,k,total_counts,temp_start_ptr; 1131 /* stuff to store connected components stored in pcbddc->mat_graph */ 1132 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1133 PetscInt n_ISForFaces,n_ISForEdges; 1134 /* near null space stuff */ 1135 MatNullSpace nearnullsp; 1136 const Vec *nearnullvecs; 1137 Vec *localnearnullsp; 1138 PetscBool nnsp_has_cnst; 1139 PetscInt nnsp_size; 1140 PetscScalar *array; 1141 /* BLAS integers */ 1142 PetscBLASInt lwork,lierr; 1143 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1144 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1145 /* LAPACK working arrays for SVD or POD */ 1146 PetscBool skip_lapack; 1147 PetscScalar *work; 1148 PetscReal *singular_vals; 1149 #if defined(PETSC_USE_COMPLEX) 1150 PetscReal *rwork; 1151 #endif 1152 #if defined(PETSC_MISSING_LAPACK_GESVD) 1153 PetscBLASInt Blas_one_2=1; 1154 PetscScalar *temp_basis,*correlation_mat; 1155 #else 1156 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1157 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1158 #endif 1159 /* change of basis */ 1160 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1161 PetscBool boolforchange; 1162 PetscBT touched,change_basis; 1163 /* auxiliary stuff */ 1164 PetscInt *nnz,*is_indices,*local_to_B; 1165 /* some quantities */ 1166 PetscInt n_vertices,total_primal_vertices; 1167 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1168 1169 1170 PetscFunctionBegin; 1171 /* Get index sets for faces, edges and vertices from graph */ 1172 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1173 pcbddc->use_vertices = PETSC_TRUE; 1174 } 1175 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1176 /* print some info */ 1177 if (pcbddc->dbg_flag) { 1178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1179 i = 0; 1180 if (ISForVertices) { 1181 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1182 } 1183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1184 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1187 } 1188 /* check if near null space is attached to global mat */ 1189 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1190 if (nearnullsp) { 1191 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1192 } else { /* if near null space is not provided BDDC uses constants by default */ 1193 nnsp_size = 0; 1194 nnsp_has_cnst = PETSC_TRUE; 1195 } 1196 /* get max number of constraints on a single cc */ 1197 max_constraints = nnsp_size; 1198 if (nnsp_has_cnst) max_constraints++; 1199 1200 /* 1201 Evaluate maximum storage size needed by the procedure 1202 - temp_indices will contain start index of each constraint stored as follows 1203 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1204 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1205 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1206 */ 1207 total_counts = n_ISForFaces+n_ISForEdges; 1208 total_counts *= max_constraints; 1209 n_vertices = 0; 1210 if (ISForVertices) { 1211 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1212 } 1213 total_counts += n_vertices; 1214 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1215 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1216 total_counts = 0; 1217 max_size_of_constraint = 0; 1218 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1219 if (i<n_ISForEdges) { 1220 used_IS = &ISForEdges[i]; 1221 } else { 1222 used_IS = &ISForFaces[i-n_ISForEdges]; 1223 } 1224 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1225 total_counts += j; 1226 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1227 } 1228 total_counts *= max_constraints; 1229 total_counts += n_vertices; 1230 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1231 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1232 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1233 /* local to boundary numbering */ 1234 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1235 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1236 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1237 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1238 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1239 /* get local part of global near null space vectors */ 1240 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1241 for (k=0;k<nnsp_size;k++) { 1242 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1243 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 } 1246 1247 /* whether or not to skip lapack calls */ 1248 skip_lapack = PETSC_TRUE; 1249 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1250 1251 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1252 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1253 PetscScalar temp_work; 1254 #if defined(PETSC_MISSING_LAPACK_GESVD) 1255 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1256 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1257 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1258 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1259 #if defined(PETSC_USE_COMPLEX) 1260 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1261 #endif 1262 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1263 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1264 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1265 lwork = -1; 1266 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1267 #if !defined(PETSC_USE_COMPLEX) 1268 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1269 #else 1270 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1271 #endif 1272 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1273 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1274 #else /* on missing GESVD */ 1275 /* SVD */ 1276 PetscInt max_n,min_n; 1277 max_n = max_size_of_constraint; 1278 min_n = max_constraints; 1279 if (max_size_of_constraint < max_constraints) { 1280 min_n = max_size_of_constraint; 1281 max_n = max_constraints; 1282 } 1283 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_COMPLEX) 1285 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1286 #endif 1287 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1288 lwork = -1; 1289 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1290 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1291 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1292 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1293 #if !defined(PETSC_USE_COMPLEX) 1294 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1295 #else 1296 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1297 #endif 1298 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1299 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1300 #endif /* on missing GESVD */ 1301 /* Allocate optimal workspace */ 1302 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1303 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1304 } 1305 /* Now we can loop on constraining sets */ 1306 total_counts = 0; 1307 temp_indices[0] = 0; 1308 /* vertices */ 1309 if (ISForVertices) { 1310 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1311 if (nnsp_has_cnst) { /* consider all vertices */ 1312 for (i=0;i<n_vertices;i++) { 1313 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1314 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1315 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1316 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1317 total_counts++; 1318 } 1319 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1320 PetscBool used_vertex; 1321 for (i=0;i<n_vertices;i++) { 1322 used_vertex = PETSC_FALSE; 1323 k = 0; 1324 while (!used_vertex && k<nnsp_size) { 1325 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1326 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1327 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1328 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1329 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1330 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1331 total_counts++; 1332 used_vertex = PETSC_TRUE; 1333 } 1334 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1335 k++; 1336 } 1337 } 1338 } 1339 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1340 n_vertices = total_counts; 1341 } 1342 1343 /* edges and faces */ 1344 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1345 if (i<n_ISForEdges) { 1346 used_IS = &ISForEdges[i]; 1347 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1348 } else { 1349 used_IS = &ISForFaces[i-n_ISForEdges]; 1350 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1351 } 1352 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1353 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1354 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1355 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1356 /* change of basis should not be performed on local periodic nodes */ 1357 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1358 if (nnsp_has_cnst) { 1359 PetscScalar quad_value; 1360 temp_constraints++; 1361 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1362 for (j=0;j<size_of_constraint;j++) { 1363 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1364 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1365 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1366 } 1367 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1368 if (boolforchange) { 1369 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1370 } 1371 total_counts++; 1372 } 1373 for (k=0;k<nnsp_size;k++) { 1374 PetscReal real_value; 1375 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1376 for (j=0;j<size_of_constraint;j++) { 1377 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1378 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1379 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1380 } 1381 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1382 /* check if array is null on the connected component */ 1383 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1384 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1385 if (real_value > 0.0) { /* keep indices and values */ 1386 temp_constraints++; 1387 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1388 if (boolforchange) { 1389 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1390 } 1391 total_counts++; 1392 } 1393 } 1394 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1395 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1396 if (!pcbddc->use_nnsp_true) { 1397 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1398 1399 #if defined(PETSC_MISSING_LAPACK_GESVD) 1400 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1401 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1402 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1403 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1404 from that computed using LAPACKgesvd 1405 -> This is due to a different computation of eigenvectors in LAPACKheev 1406 -> The quality of the POD-computed basis will be the same */ 1407 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1408 /* Store upper triangular part of correlation matrix */ 1409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1410 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1411 for (j=0;j<temp_constraints;j++) { 1412 for (k=0;k<j+1;k++) { 1413 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1414 } 1415 } 1416 /* compute eigenvalues and eigenvectors of correlation matrix */ 1417 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1418 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1419 #if !defined(PETSC_USE_COMPLEX) 1420 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1421 #else 1422 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1423 #endif 1424 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1425 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1426 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1427 j=0; 1428 while (j < temp_constraints && singular_vals[j] < tol) j++; 1429 total_counts=total_counts-j; 1430 /* scale and copy POD basis into used quadrature memory */ 1431 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1432 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1433 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1434 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1435 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1437 if (j<temp_constraints) { 1438 PetscInt ii; 1439 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1440 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1441 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1442 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1443 for (k=0;k<temp_constraints-j;k++) { 1444 for (ii=0;ii<size_of_constraint;ii++) { 1445 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1446 } 1447 } 1448 } 1449 #else /* on missing GESVD */ 1450 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1451 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1453 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1454 #if !defined(PETSC_USE_COMPLEX) 1455 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1456 #else 1457 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1458 #endif 1459 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1460 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1461 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1462 k = temp_constraints; 1463 if (k > size_of_constraint) k = size_of_constraint; 1464 j = 0; 1465 while (j < k && singular_vals[k-j-1] < tol) j++; 1466 total_counts = total_counts-temp_constraints+k-j; 1467 #endif /* on missing GESVD */ 1468 } 1469 } 1470 /* free index sets of faces, edges and vertices */ 1471 for (i=0;i<n_ISForFaces;i++) { 1472 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1473 } 1474 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1475 for (i=0;i<n_ISForEdges;i++) { 1476 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1477 } 1478 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1479 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1480 1481 /* free workspace */ 1482 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1483 ierr = PetscFree(work);CHKERRQ(ierr); 1484 #if defined(PETSC_USE_COMPLEX) 1485 ierr = PetscFree(rwork);CHKERRQ(ierr); 1486 #endif 1487 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1488 #if defined(PETSC_MISSING_LAPACK_GESVD) 1489 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1490 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1491 #endif 1492 } 1493 for (k=0;k<nnsp_size;k++) { 1494 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1495 } 1496 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1497 1498 /* set quantities in pcbddc data structure */ 1499 /* n_vertices defines the number of subdomain corners in the primal space */ 1500 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1501 pcbddc->local_primal_size = total_counts; 1502 pcbddc->n_vertices = n_vertices; 1503 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1504 1505 /* Create constraint matrix */ 1506 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1507 /* so we need to set it up properly either with or without change of basis */ 1508 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1509 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1510 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1511 /* array to compute a local numbering of constraints : vertices first then constraints */ 1512 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1513 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1514 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1515 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1516 /* auxiliary stuff for basis change */ 1517 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1518 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1519 1520 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1521 total_primal_vertices=0; 1522 for (i=0;i<pcbddc->local_primal_size;i++) { 1523 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1524 if (size_of_constraint == 1) { 1525 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1526 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1527 aux_primal_minloc[total_primal_vertices]=0; 1528 total_primal_vertices++; 1529 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1530 PetscInt min_loc,min_index; 1531 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1532 /* find first untouched local node */ 1533 k = 0; 1534 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1535 min_index = global_indices[k]; 1536 min_loc = k; 1537 /* search the minimum among global nodes already untouched on the cc */ 1538 for (k=1;k<size_of_constraint;k++) { 1539 /* there can be more than one constraint on a single connected component */ 1540 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1541 min_index = global_indices[k]; 1542 min_loc = k; 1543 } 1544 } 1545 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1546 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1547 aux_primal_minloc[total_primal_vertices]=min_loc; 1548 total_primal_vertices++; 1549 } 1550 } 1551 /* free workspace */ 1552 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1553 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1554 /* permute indices in order to have a sorted set of vertices */ 1555 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1556 1557 /* nonzero structure of constraint matrix */ 1558 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1559 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1560 j=total_primal_vertices; 1561 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1562 if (!PetscBTLookup(change_basis,i)) { 1563 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1564 j++; 1565 } 1566 } 1567 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1568 ierr = PetscFree(nnz);CHKERRQ(ierr); 1569 /* set values in constraint matrix */ 1570 for (i=0;i<total_primal_vertices;i++) { 1571 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1572 } 1573 total_counts = total_primal_vertices; 1574 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1575 if (!PetscBTLookup(change_basis,i)) { 1576 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1577 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1578 total_counts++; 1579 } 1580 } 1581 /* assembling */ 1582 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 /* 1585 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1586 */ 1587 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1588 if (pcbddc->use_change_of_basis) { 1589 PetscBool qr_needed = PETSC_FALSE; 1590 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1591 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1592 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1593 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1594 /* work arrays */ 1595 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1596 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1597 /* nonzeros per row */ 1598 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1599 if (PetscBTLookup(change_basis,i)) { 1600 qr_needed = PETSC_TRUE; 1601 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1602 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1603 } 1604 } 1605 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1606 ierr = PetscFree(nnz);CHKERRQ(ierr); 1607 /* Set initial identity in the matrix */ 1608 for (i=0;i<pcis->n_B;i++) { 1609 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1610 } 1611 1612 /* Now we loop on the constraints which need a change of basis */ 1613 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1614 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1615 /* Change of basis matrix T computed via QR decomposition of constraints */ 1616 if (qr_needed) { 1617 /* dual and primal dofs on a single cc */ 1618 PetscInt dual_dofs,primal_dofs; 1619 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1620 PetscInt primal_counter; 1621 /* working stuff for GEQRF */ 1622 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1623 PetscBLASInt lqr_work; 1624 /* working stuff for UNGQR */ 1625 PetscScalar *gqr_work,lgqr_work_t; 1626 PetscBLASInt lgqr_work; 1627 /* working stuff for TRTRS */ 1628 PetscScalar *trs_rhs; 1629 PetscBLASInt Blas_NRHS; 1630 /* pointers for values insertion into change of basis matrix */ 1631 PetscInt *start_rows,*start_cols; 1632 PetscScalar *start_vals; 1633 /* working stuff for values insertion */ 1634 PetscBT is_primal; 1635 1636 /* space to store Q */ 1637 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1638 /* first we issue queries for optimal work */ 1639 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1640 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1641 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1642 lqr_work = -1; 1643 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1644 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1645 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1646 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1647 lgqr_work = -1; 1648 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1649 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1650 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1651 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1652 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1653 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1654 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1655 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1656 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1657 /* array to store scaling factors for reflectors */ 1658 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1659 /* array to store rhs and solution of triangular solver */ 1660 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1661 /* array to store whether a node is primal or not */ 1662 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1663 for (i=0;i<total_primal_vertices;i++) { 1664 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1665 } 1666 1667 /* allocating workspace for check */ 1668 if (pcbddc->dbg_flag) { 1669 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1671 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1672 } 1673 1674 /* loop on constraints and see whether or not they need a change of basis */ 1675 /* -> using implicit ordering contained in temp_indices data */ 1676 total_counts = pcbddc->n_vertices; 1677 primal_counter = total_counts; 1678 while (total_counts<pcbddc->local_primal_size) { 1679 primal_dofs = 1; 1680 if (PetscBTLookup(change_basis,total_counts)) { 1681 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1682 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1683 primal_dofs++; 1684 } 1685 /* get constraint info */ 1686 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1687 dual_dofs = size_of_constraint-primal_dofs; 1688 1689 /* copy quadrature constraints for change of basis check */ 1690 if (pcbddc->dbg_flag) { 1691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1692 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1693 } 1694 1695 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1696 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1697 1698 /* compute QR decomposition of constraints */ 1699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1700 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1702 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1703 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1704 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1705 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1706 1707 /* explictly compute R^-T */ 1708 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1709 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1710 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1711 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1713 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1714 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1715 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1716 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1717 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1718 1719 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1720 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1722 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1724 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1725 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1726 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1728 1729 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1730 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1731 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1733 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1734 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1735 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1736 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1738 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1739 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1740 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1741 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1742 1743 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1744 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1745 /* insert cols for primal dofs */ 1746 for (j=0;j<primal_dofs;j++) { 1747 start_vals = &qr_basis[j*size_of_constraint]; 1748 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1749 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1750 } 1751 /* insert cols for dual dofs */ 1752 for (j=0,k=0;j<dual_dofs;k++) { 1753 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1754 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1755 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1756 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1757 j++; 1758 } 1759 } 1760 1761 /* check change of basis */ 1762 if (pcbddc->dbg_flag) { 1763 PetscInt ii,jj; 1764 PetscBool valid_qr=PETSC_TRUE; 1765 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1766 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1767 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1768 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1769 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1770 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1771 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1772 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1773 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1774 for (jj=0;jj<size_of_constraint;jj++) { 1775 for (ii=0;ii<primal_dofs;ii++) { 1776 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1777 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1778 } 1779 } 1780 if (!valid_qr) { 1781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1782 for (jj=0;jj<size_of_constraint;jj++) { 1783 for (ii=0;ii<primal_dofs;ii++) { 1784 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1785 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1786 } 1787 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1788 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1789 } 1790 } 1791 } 1792 } else { 1793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1794 } 1795 } 1796 /* increment primal counter */ 1797 primal_counter += primal_dofs; 1798 } else { 1799 if (pcbddc->dbg_flag) { 1800 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1801 } 1802 } 1803 /* increment constraint counter total_counts */ 1804 total_counts += primal_dofs; 1805 } 1806 if (pcbddc->dbg_flag) { 1807 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1808 ierr = PetscFree(work);CHKERRQ(ierr); 1809 } 1810 /* free workspace */ 1811 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1812 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1813 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1814 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1815 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1816 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1817 } 1818 /* assembling */ 1819 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 /* 1822 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1823 */ 1824 } 1825 /* free workspace */ 1826 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1827 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1828 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1829 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1830 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1831 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1832 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1833 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1839 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1840 { 1841 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1842 PC_IS *pcis = (PC_IS*)pc->data; 1843 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1844 PetscInt bs,ierr,i,vertex_size; 1845 PetscViewer viewer=pcbddc->dbg_viewer; 1846 1847 PetscFunctionBegin; 1848 /* Init local Graph struct */ 1849 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1850 1851 /* Check validity of the csr graph passed in by the user */ 1852 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1853 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1854 } 1855 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1856 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1857 Mat mat_adj; 1858 const PetscInt *xadj,*adjncy; 1859 PetscBool flg_row=PETSC_TRUE; 1860 1861 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1862 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1863 if (!flg_row) { 1864 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1865 } 1866 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1867 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1868 if (!flg_row) { 1869 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1870 } 1871 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1872 } 1873 1874 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1875 vertex_size = 1; 1876 if (!pcbddc->n_ISForDofs) { 1877 IS *custom_ISForDofs; 1878 1879 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1880 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1881 for (i=0;i<bs;i++) { 1882 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1883 } 1884 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1885 /* remove my references to IS objects */ 1886 for (i=0;i<bs;i++) { 1887 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1888 } 1889 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1890 } else { /* mat block size as vertex size (used for elasticity) */ 1891 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1892 } 1893 1894 /* Setup of Graph */ 1895 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1896 1897 /* Graph's connected components analysis */ 1898 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1899 1900 /* print some info to stdout */ 1901 if (pcbddc->dbg_flag) { 1902 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1909 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 1910 { 1911 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1912 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 n = 0; 1917 vertices = 0; 1918 if (pcbddc->ConstraintMatrix) { 1919 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1920 for (i=0;i<local_primal_size;i++) { 1921 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1922 if (size_of_constraint == 1) n++; 1923 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1924 } 1925 if (vertices_idx) { 1926 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1927 n = 0; 1928 for (i=0;i<local_primal_size;i++) { 1929 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1930 if (size_of_constraint == 1) { 1931 vertices[n++]=row_cmat_indices[0]; 1932 } 1933 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1934 } 1935 } 1936 } 1937 *n_vertices = n; 1938 if (vertices_idx) *vertices_idx = vertices; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1944 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 1945 { 1946 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1947 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1948 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1949 PetscBT touched; 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 n = 0; 1954 constraints_index = 0; 1955 if (pcbddc->ConstraintMatrix) { 1956 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1957 max_size_of_constraint = 0; 1958 for (i=0;i<local_primal_size;i++) { 1959 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1960 if (size_of_constraint > 1) { 1961 n++; 1962 } 1963 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1964 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1965 } 1966 if (constraints_idx) { 1967 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1968 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1969 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1970 n = 0; 1971 for (i=0;i<local_primal_size;i++) { 1972 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1973 if (size_of_constraint > 1) { 1974 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1975 /* find first untouched local node */ 1976 j = 0; 1977 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1978 min_index = row_cmat_global_indices[j]; 1979 min_loc = j; 1980 /* search the minimum among nodes not yet touched on the connected component 1981 since there can be more than one constraint on a single cc */ 1982 for (j=1;j<size_of_constraint;j++) { 1983 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1984 min_index = row_cmat_global_indices[j]; 1985 min_loc = j; 1986 } 1987 } 1988 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1989 constraints_index[n++] = row_cmat_indices[min_loc]; 1990 } 1991 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1992 } 1993 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1994 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1995 } 1996 } 1997 *n_constraints = n; 1998 if (constraints_idx) *constraints_idx = constraints_index; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /* the next two functions has been adapted from pcis.c */ 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "PCBDDCApplySchur" 2005 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2006 { 2007 PetscErrorCode ierr; 2008 PC_IS *pcis = (PC_IS*)(pc->data); 2009 2010 PetscFunctionBegin; 2011 if (!vec2_B) { vec2_B = v; } 2012 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2013 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2014 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2015 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2016 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2022 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2023 { 2024 PetscErrorCode ierr; 2025 PC_IS *pcis = (PC_IS*)(pc->data); 2026 2027 PetscFunctionBegin; 2028 if (!vec2_B) { vec2_B = v; } 2029 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2030 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2031 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2032 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2033 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "PCBDDCSubsetNumbering" 2039 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2040 { 2041 Vec local_vec,global_vec; 2042 IS seqis,paris; 2043 VecScatter scatter_ctx; 2044 PetscScalar *array; 2045 PetscInt *temp_global_dofs; 2046 PetscScalar globalsum; 2047 PetscInt i,j,s; 2048 PetscInt nlocals,first_index,old_index,max_local; 2049 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2050 PetscMPIInt *dof_sizes,*dof_displs; 2051 PetscBool first_found; 2052 PetscErrorCode ierr; 2053 2054 PetscFunctionBegin; 2055 /* mpi buffers */ 2056 MPI_Comm_size(comm,&size_prec_comm); 2057 MPI_Comm_rank(comm,&rank_prec_comm); 2058 j = ( !rank_prec_comm ? size_prec_comm : 0); 2059 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2060 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2061 /* get maximum size of subset */ 2062 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2063 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2064 max_local = 0; 2065 if (n_local_dofs) { 2066 max_local = temp_global_dofs[0]; 2067 for (i=1;i<n_local_dofs;i++) { 2068 if (max_local < temp_global_dofs[i] ) { 2069 max_local = temp_global_dofs[i]; 2070 } 2071 } 2072 } 2073 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2074 max_global++; 2075 max_local = 0; 2076 if (n_local_dofs) { 2077 max_local = local_dofs[0]; 2078 for (i=1;i<n_local_dofs;i++) { 2079 if (max_local < local_dofs[i] ) { 2080 max_local = local_dofs[i]; 2081 } 2082 } 2083 } 2084 max_local++; 2085 /* allocate workspace */ 2086 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2087 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2088 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2089 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2090 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2091 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2092 /* create scatter */ 2093 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2094 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2095 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2096 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2097 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2098 /* init array */ 2099 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2100 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2101 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2102 if (local_dofs_mult) { 2103 for (i=0;i<n_local_dofs;i++) { 2104 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2105 } 2106 } else { 2107 for (i=0;i<n_local_dofs;i++) { 2108 array[local_dofs[i]]=1.0; 2109 } 2110 } 2111 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2112 /* scatter into global vec and get total number of global dofs */ 2113 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2114 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2116 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2117 /* Fill global_vec with cumulative function for global numbering */ 2118 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2119 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2120 nlocals = 0; 2121 first_index = -1; 2122 first_found = PETSC_FALSE; 2123 for (i=0;i<s;i++) { 2124 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2125 first_found = PETSC_TRUE; 2126 first_index = i; 2127 } 2128 nlocals += (PetscInt)PetscRealPart(array[i]); 2129 } 2130 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2131 if (!rank_prec_comm) { 2132 dof_displs[0]=0; 2133 for (i=1;i<size_prec_comm;i++) { 2134 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2135 } 2136 } 2137 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2138 if (first_found) { 2139 array[first_index] += (PetscScalar)nlocals; 2140 old_index = first_index; 2141 for (i=first_index+1;i<s;i++) { 2142 if (PetscRealPart(array[i]) > 0.0) { 2143 array[i] += array[old_index]; 2144 old_index = i; 2145 } 2146 } 2147 } 2148 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2149 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2150 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2151 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2152 /* get global ordering of local dofs */ 2153 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2154 if (local_dofs_mult) { 2155 for (i=0;i<n_local_dofs;i++) { 2156 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2157 } 2158 } else { 2159 for (i=0;i<n_local_dofs;i++) { 2160 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2161 } 2162 } 2163 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2164 /* free workspace */ 2165 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2166 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2167 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2168 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2169 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2170 /* return pointer to global ordering of local dofs */ 2171 *global_numbering_subset = temp_global_dofs; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2177 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2178 { 2179 PetscInt i,j; 2180 PetscScalar *alphas; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 /* this implements stabilized Gram-Schmidt */ 2185 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2186 for (i=0;i<n;i++) { 2187 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2188 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2189 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2190 } 2191 ierr = PetscFree(alphas);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /* BDDC requires metis 5.0.1 for multilevel */ 2196 #if defined(PETSC_HAVE_METIS) 2197 #include "metis.h" 2198 #define MetisInt idx_t 2199 #define MetisScalar real_t 2200 #endif 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2204 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2205 { 2206 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2207 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2208 PC_IS *pcis = (PC_IS*)pc->data; 2209 MPI_Comm prec_comm; 2210 MPI_Comm coarse_comm; 2211 Mat coarse_mat; 2212 2213 PetscInt coarse_size; 2214 2215 MatNullSpace CoarseNullSpace; 2216 2217 /* common to all choiches */ 2218 PetscScalar *temp_coarse_mat_vals; 2219 PetscScalar *ins_coarse_mat_vals; 2220 PetscInt *ins_local_primal_indices; 2221 PetscMPIInt *localsizes2,*localdispl2; 2222 PetscMPIInt size_prec_comm; 2223 PetscMPIInt rank_prec_comm; 2224 PetscMPIInt active_rank=MPI_PROC_NULL; 2225 PetscMPIInt master_proc=0; 2226 PetscInt ins_local_primal_size; 2227 /* specific to MULTILEVEL_BDDC */ 2228 PetscMPIInt *ranks_recv=0; 2229 PetscMPIInt count_recv=0; 2230 PetscMPIInt rank_coarse_proc_send_to=-1; 2231 PetscMPIInt coarse_color = MPI_UNDEFINED; 2232 ISLocalToGlobalMapping coarse_ISLG; 2233 /* some other variables */ 2234 PetscErrorCode ierr; 2235 MatType coarse_mat_type; 2236 PCType coarse_pc_type; 2237 KSPType coarse_ksp_type; 2238 PC pc_temp; 2239 PetscInt i,j,k; 2240 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2241 /* verbose output viewer */ 2242 PetscViewer viewer=pcbddc->dbg_viewer; 2243 PetscInt dbg_flag=pcbddc->dbg_flag; 2244 2245 PetscInt offset,offset2; 2246 PetscMPIInt im_active,active_procs; 2247 PetscInt *dnz,*onz; 2248 2249 PetscBool setsym,issym=PETSC_FALSE; 2250 2251 PetscFunctionBegin; 2252 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2253 ins_local_primal_indices = 0; 2254 ins_coarse_mat_vals = 0; 2255 localsizes2 = 0; 2256 localdispl2 = 0; 2257 temp_coarse_mat_vals = 0; 2258 coarse_ISLG = 0; 2259 2260 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2261 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2262 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2263 2264 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 2265 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2266 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2267 } 2268 2269 /* Assign global numbering to coarse dofs */ 2270 { 2271 PetscInt *auxlocal_primal,*aux_idx; 2272 PetscMPIInt mpi_local_primal_size; 2273 PetscScalar coarsesum,*array; 2274 2275 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2276 2277 /* Construct needed data structures for message passing */ 2278 j = 0; 2279 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2280 j = size_prec_comm; 2281 } 2282 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2283 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2284 /* Gather local_primal_size information for all processes */ 2285 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2286 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2287 } else { 2288 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2289 } 2290 pcbddc->replicated_primal_size = 0; 2291 for (i=0; i<j; i++) { 2292 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2293 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2294 } 2295 2296 /* This code fragment assumes that the number of local constraints per connected component 2297 is not greater than the number of nodes defined for the connected component 2298 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2299 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2300 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2301 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2302 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2303 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2304 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2305 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2306 /* Compute number of coarse dofs */ 2307 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2308 2309 if (dbg_flag) { 2310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2311 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2312 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2313 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2314 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2315 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2316 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2317 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2318 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2319 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2321 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2322 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2323 for (i=0;i<pcis->n;i++) { 2324 if (array[i] == 1.0) { 2325 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2326 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2327 } 2328 } 2329 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2330 for (i=0;i<pcis->n;i++) { 2331 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2332 } 2333 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2334 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2335 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2336 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2337 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2338 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2339 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2340 } 2341 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2342 } 2343 2344 if (dbg_flag) { 2345 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); 2346 if (dbg_flag > 1) { 2347 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2350 for (i=0;i<pcbddc->local_primal_size;i++) { 2351 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2352 } 2353 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2354 } 2355 } 2356 2357 im_active = 0; 2358 if (pcis->n) im_active = 1; 2359 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2360 2361 /* adapt coarse problem type */ 2362 #if defined(PETSC_HAVE_METIS) 2363 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2364 if (pcbddc->current_level < pcbddc->max_levels) { 2365 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2366 if (dbg_flag) { 2367 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2368 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2369 } 2370 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2371 } 2372 } else { 2373 if (dbg_flag) { 2374 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2375 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2376 } 2377 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2378 } 2379 } 2380 #else 2381 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2382 #endif 2383 2384 switch(pcbddc->coarse_problem_type){ 2385 2386 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2387 { 2388 #if defined(PETSC_HAVE_METIS) 2389 /* we need additional variables */ 2390 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2391 MetisInt *metis_coarse_subdivision; 2392 MetisInt options[METIS_NOPTIONS]; 2393 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2394 PetscMPIInt procs_jumps_coarse_comm; 2395 PetscMPIInt *coarse_subdivision; 2396 PetscMPIInt *total_count_recv; 2397 PetscMPIInt *total_ranks_recv; 2398 PetscMPIInt *displacements_recv; 2399 PetscMPIInt *my_faces_connectivity; 2400 PetscMPIInt *petsc_faces_adjncy; 2401 MetisInt *faces_adjncy; 2402 MetisInt *faces_xadj; 2403 PetscMPIInt *number_of_faces; 2404 PetscMPIInt *faces_displacements; 2405 PetscInt *array_int; 2406 PetscMPIInt my_faces=0; 2407 PetscMPIInt total_faces=0; 2408 PetscInt ranks_stretching_ratio; 2409 2410 /* define some quantities */ 2411 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2412 coarse_mat_type = MATIS; 2413 coarse_pc_type = PCBDDC; 2414 coarse_ksp_type = KSPRICHARDSON; 2415 2416 /* details of coarse decomposition */ 2417 n_subdomains = active_procs; 2418 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2419 ranks_stretching_ratio = size_prec_comm/active_procs; 2420 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2421 2422 #if 0 2423 PetscMPIInt *old_ranks; 2424 PetscInt *new_ranks,*jj,*ii; 2425 MatPartitioning mat_part; 2426 IS coarse_new_decomposition,is_numbering; 2427 PetscViewer viewer_test; 2428 MPI_Comm test_coarse_comm; 2429 PetscMPIInt test_coarse_color; 2430 Mat mat_adj; 2431 /* Create new communicator for coarse problem splitting the old one */ 2432 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2433 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2434 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2435 test_coarse_comm = MPI_COMM_NULL; 2436 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2437 if (im_active) { 2438 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2439 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2440 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2441 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2442 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2443 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2444 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2445 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2446 k = pcis->n_neigh-1; 2447 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2448 ii[0]=0; 2449 ii[1]=k; 2450 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2451 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2452 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2453 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2454 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2455 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2456 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2457 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2458 printf("Setting Nparts %d\n",n_parts); 2459 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2460 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2461 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2462 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2463 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2464 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2465 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2466 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2467 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2468 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2469 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2470 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2471 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2472 } 2473 #endif 2474 2475 /* build CSR graph of subdomains' connectivity */ 2476 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2477 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2478 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2479 for (j=0;j<pcis->n_shared[i];j++){ 2480 array_int[ pcis->shared[i][j] ]+=1; 2481 } 2482 } 2483 for (i=1;i<pcis->n_neigh;i++){ 2484 for (j=0;j<pcis->n_shared[i];j++){ 2485 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2486 my_faces++; 2487 break; 2488 } 2489 } 2490 } 2491 2492 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2493 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2494 my_faces=0; 2495 for (i=1;i<pcis->n_neigh;i++){ 2496 for (j=0;j<pcis->n_shared[i];j++){ 2497 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2498 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2499 my_faces++; 2500 break; 2501 } 2502 } 2503 } 2504 if (rank_prec_comm == master_proc) { 2505 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2506 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2507 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2508 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2509 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2510 } 2511 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2512 if (rank_prec_comm == master_proc) { 2513 faces_xadj[0]=0; 2514 faces_displacements[0]=0; 2515 j=0; 2516 for (i=1;i<size_prec_comm+1;i++) { 2517 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2518 if (number_of_faces[i-1]) { 2519 j++; 2520 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2521 } 2522 } 2523 } 2524 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2525 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2526 ierr = PetscFree(array_int);CHKERRQ(ierr); 2527 if (rank_prec_comm == master_proc) { 2528 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2529 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2530 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2531 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2532 } 2533 2534 if ( rank_prec_comm == master_proc ) { 2535 2536 PetscInt heuristic_for_metis=3; 2537 2538 ncon=1; 2539 faces_nvtxs=n_subdomains; 2540 /* partition graoh induced by face connectivity */ 2541 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2542 ierr = METIS_SetDefaultOptions(options); 2543 /* we need a contiguous partition of the coarse mesh */ 2544 options[METIS_OPTION_CONTIG]=1; 2545 options[METIS_OPTION_NITER]=30; 2546 if (pcbddc->coarsening_ratio > 1) { 2547 if (n_subdomains>n_parts*heuristic_for_metis) { 2548 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2549 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2550 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2551 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2552 } else { 2553 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2554 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2555 } 2556 } else { 2557 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2558 } 2559 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2560 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2561 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2562 2563 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2564 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2565 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2566 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2567 } 2568 2569 /* Create new communicator for coarse problem splitting the old one */ 2570 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2571 coarse_color=0; /* for communicator splitting */ 2572 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2573 } 2574 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2575 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2576 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2577 2578 if ( coarse_color == 0 ) { 2579 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2580 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2581 } else { 2582 rank_coarse_comm = MPI_PROC_NULL; 2583 } 2584 2585 /* master proc take care of arranging and distributing coarse information */ 2586 if (rank_coarse_comm == master_proc) { 2587 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2588 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2589 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2590 /* some initializations */ 2591 displacements_recv[0]=0; 2592 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2593 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2594 for (j=0;j<size_coarse_comm;j++) { 2595 for (i=0;i<size_prec_comm;i++) { 2596 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2597 } 2598 } 2599 /* displacements needed for scatterv of total_ranks_recv */ 2600 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2601 2602 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2603 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2604 for (j=0;j<size_coarse_comm;j++) { 2605 for (i=0;i<size_prec_comm;i++) { 2606 if (coarse_subdivision[i]==j) { 2607 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2608 total_count_recv[j]+=1; 2609 } 2610 } 2611 } 2612 /*for (j=0;j<size_coarse_comm;j++) { 2613 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2614 for (i=0;i<total_count_recv[j];i++) { 2615 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2616 } 2617 printf("\n"); 2618 }*/ 2619 2620 /* identify new decomposition in terms of ranks in the old communicator */ 2621 for (i=0;i<n_subdomains;i++) { 2622 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2623 } 2624 /*printf("coarse_subdivision in old end new ranks\n"); 2625 for (i=0;i<size_prec_comm;i++) 2626 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2627 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2628 } else { 2629 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2630 } 2631 printf("\n");*/ 2632 } 2633 2634 /* Scatter new decomposition for send details */ 2635 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2636 /* Scatter receiving details to members of coarse decomposition */ 2637 if ( coarse_color == 0) { 2638 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2639 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2640 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2641 } 2642 2643 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2644 if (coarse_color == 0) { 2645 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2646 for (i=0;i<count_recv;i++) 2647 printf("%d ",ranks_recv[i]); 2648 printf("\n"); 2649 }*/ 2650 2651 if (rank_prec_comm == master_proc) { 2652 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2653 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2654 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2655 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2656 } 2657 #endif 2658 break; 2659 } 2660 2661 case(PARALLEL_BDDC): 2662 2663 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2664 coarse_mat_type = MATAIJ; 2665 coarse_pc_type = PCREDUNDANT; 2666 coarse_ksp_type = KSPPREONLY; 2667 coarse_comm = prec_comm; 2668 active_rank = rank_prec_comm; 2669 break; 2670 2671 } 2672 2673 switch(pcbddc->coarse_communications_type){ 2674 2675 case(SCATTERS_BDDC): 2676 { 2677 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2678 2679 IS coarse_IS; 2680 2681 if(pcbddc->coarsening_ratio == 1) { 2682 ins_local_primal_size = pcbddc->local_primal_size; 2683 ins_local_primal_indices = pcbddc->local_primal_indices; 2684 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2685 /* nonzeros */ 2686 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2687 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2688 for (i=0;i<ins_local_primal_size;i++) { 2689 dnz[i] = ins_local_primal_size; 2690 } 2691 } else { 2692 PetscMPIInt send_size; 2693 PetscMPIInt *send_buffer; 2694 PetscInt *aux_ins_indices; 2695 PetscInt ii,jj; 2696 MPI_Request *requests; 2697 2698 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2699 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2700 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2701 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2702 pcbddc->replicated_primal_size = count_recv; 2703 j = 0; 2704 for (i=0;i<count_recv;i++) { 2705 pcbddc->local_primal_displacements[i] = j; 2706 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2707 } 2708 pcbddc->local_primal_displacements[count_recv] = j; 2709 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2710 /* allocate auxiliary space */ 2711 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2712 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2713 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2714 /* allocate stuffs for message massing */ 2715 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2716 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2717 /* send indices to be inserted */ 2718 for (i=0;i<count_recv;i++) { 2719 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2720 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2721 } 2722 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2723 send_size = pcbddc->local_primal_size; 2724 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2725 for (i=0;i<send_size;i++) { 2726 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2727 } 2728 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2729 } 2730 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2731 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2732 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2733 } 2734 j = 0; 2735 for (i=0;i<count_recv;i++) { 2736 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2737 localsizes2[i] = ii*ii; 2738 localdispl2[i] = j; 2739 j += localsizes2[i]; 2740 jj = pcbddc->local_primal_displacements[i]; 2741 /* it counts the coarse subdomains sharing the coarse node */ 2742 for (k=0;k<ii;k++) { 2743 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2744 } 2745 } 2746 /* temp_coarse_mat_vals used to store matrix values to be received */ 2747 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2748 /* evaluate how many values I will insert in coarse mat */ 2749 ins_local_primal_size = 0; 2750 for (i=0;i<coarse_size;i++) { 2751 if (aux_ins_indices[i]) { 2752 ins_local_primal_size++; 2753 } 2754 } 2755 /* evaluate indices I will insert in coarse mat */ 2756 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2757 j = 0; 2758 for(i=0;i<coarse_size;i++) { 2759 if(aux_ins_indices[i]) { 2760 ins_local_primal_indices[j] = i; 2761 j++; 2762 } 2763 } 2764 /* processes partecipating in coarse problem receive matrix data from their friends */ 2765 for (i=0;i<count_recv;i++) { 2766 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2767 } 2768 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2769 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2770 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2771 } 2772 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2773 /* nonzeros */ 2774 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2775 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2776 /* use aux_ins_indices to realize a global to local mapping */ 2777 j=0; 2778 for(i=0;i<coarse_size;i++){ 2779 if(aux_ins_indices[i]==0){ 2780 aux_ins_indices[i]=-1; 2781 } else { 2782 aux_ins_indices[i]=j; 2783 j++; 2784 } 2785 } 2786 for (i=0;i<count_recv;i++) { 2787 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2788 for (k=0;k<j;k++) { 2789 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2790 } 2791 } 2792 /* check */ 2793 for (i=0;i<ins_local_primal_size;i++) { 2794 if (dnz[i] > ins_local_primal_size) { 2795 dnz[i] = ins_local_primal_size; 2796 } 2797 } 2798 ierr = PetscFree(requests);CHKERRQ(ierr); 2799 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2800 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2801 } 2802 /* create local to global mapping needed by coarse MATIS */ 2803 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2804 coarse_comm = prec_comm; 2805 active_rank = rank_prec_comm; 2806 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2808 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2809 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2810 /* arrays for values insertion */ 2811 ins_local_primal_size = pcbddc->local_primal_size; 2812 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2813 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2814 for (j=0;j<ins_local_primal_size;j++){ 2815 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2816 for (i=0;i<ins_local_primal_size;i++) { 2817 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2818 } 2819 } 2820 } 2821 break; 2822 2823 } 2824 2825 } 2826 2827 /* Now create and fill up coarse matrix */ 2828 if ( rank_prec_comm == active_rank ) { 2829 2830 Mat matis_coarse_local_mat; 2831 2832 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2833 ierr = MatCreate(coarse_comm,&coarse_mat);CHKERRQ(ierr); 2834 ierr = MatSetSizes(coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2835 ierr = MatSetType(coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2836 ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr); 2837 ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr); 2838 ierr = MatSetUp(coarse_mat);CHKERRQ(ierr); 2839 ierr = MatSetOption(coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2840 ierr = MatSetOption(coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2841 } else { 2842 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr); 2843 ierr = MatSetUp(coarse_mat);CHKERRQ(ierr); 2844 ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2845 ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr); 2846 ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr); 2847 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2848 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2849 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2850 } 2851 /* preallocation */ 2852 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2853 2854 PetscInt lrows,lcols,bs; 2855 2856 ierr = MatGetLocalSize(coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2857 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2858 ierr = MatGetBlockSize(coarse_mat,&bs);CHKERRQ(ierr); 2859 2860 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2861 2862 Vec vec_dnz,vec_onz; 2863 PetscScalar *my_dnz,*my_onz,*array; 2864 PetscInt *mat_ranges,*row_ownership; 2865 PetscInt coarse_index_row,coarse_index_col,owner; 2866 2867 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 2868 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2869 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 2870 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 2871 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2872 2873 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 2874 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 2875 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 2876 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 2877 2878 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 2879 ierr = MatGetOwnershipRanges(coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2880 for (i=0;i<size_prec_comm;i++) { 2881 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2882 row_ownership[j]=i; 2883 } 2884 } 2885 2886 for (i=0;i<pcbddc->local_primal_size;i++) { 2887 coarse_index_row = pcbddc->local_primal_indices[i]; 2888 owner = row_ownership[coarse_index_row]; 2889 for (j=i;j<pcbddc->local_primal_size;j++) { 2890 owner = row_ownership[coarse_index_row]; 2891 coarse_index_col = pcbddc->local_primal_indices[j]; 2892 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 2893 my_dnz[i] += 1.0; 2894 } else { 2895 my_onz[i] += 1.0; 2896 } 2897 if (i != j) { 2898 owner = row_ownership[coarse_index_col]; 2899 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 2900 my_dnz[j] += 1.0; 2901 } else { 2902 my_onz[j] += 1.0; 2903 } 2904 } 2905 } 2906 } 2907 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2908 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2909 if (pcbddc->local_primal_size) { 2910 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2911 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2912 } 2913 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2914 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2915 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2916 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2917 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 2918 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2919 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2920 2921 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2922 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2923 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2924 2925 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2926 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2927 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2928 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2930 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2931 } 2932 2933 /* check */ 2934 for (i=0;i<lrows;i++) { 2935 if (dnz[i]>lcols) dnz[i]=lcols; 2936 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 2937 } 2938 ierr = MatSeqAIJSetPreallocation(coarse_mat,0,dnz);CHKERRQ(ierr); 2939 ierr = MatMPIAIJSetPreallocation(coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 2940 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2941 } else { 2942 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 2943 ierr = PetscFree(dnz);CHKERRQ(ierr); 2944 } 2945 /* insert values */ 2946 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2947 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2948 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2949 if (pcbddc->coarsening_ratio == 1) { 2950 ins_coarse_mat_vals = coarse_submat_vals; 2951 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2952 } else { 2953 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 2954 for (k=0;k<pcbddc->replicated_primal_size;k++) { 2955 offset = pcbddc->local_primal_displacements[k]; 2956 offset2 = localdispl2[k]; 2957 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 2958 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2959 for (j=0;j<ins_local_primal_size;j++){ 2960 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 2961 } 2962 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 2963 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2964 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 2965 } 2966 } 2967 ins_local_primal_indices = 0; 2968 ins_coarse_mat_vals = 0; 2969 } 2970 ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 /* symmetry of coarse matrix */ 2973 if (issym) { 2974 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2975 } 2976 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2977 } 2978 2979 /* create loc to glob scatters if needed */ 2980 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2981 IS local_IS,global_IS; 2982 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2983 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2984 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2985 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2986 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2987 } 2988 2989 /* free memory no longer needed */ 2990 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2991 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2992 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 2993 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 2994 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 2995 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 2996 2997 /* Compute coarse null space */ 2998 CoarseNullSpace = 0; 2999 if (pcbddc->NullSpace) { 3000 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3001 } 3002 3003 /* KSP for coarse problem */ 3004 if (rank_prec_comm == active_rank) { 3005 PetscBool isbddc=PETSC_FALSE; 3006 3007 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3008 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3009 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3010 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3011 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3012 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3013 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3014 /* Allow user's customization */ 3015 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3016 /* Set Up PC for coarse problem BDDC */ 3017 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3018 i = pcbddc->current_level+1; 3019 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3020 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3021 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3022 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3023 if (CoarseNullSpace) { 3024 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3025 } 3026 if (dbg_flag) { 3027 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3028 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3029 } 3030 } else { 3031 if (CoarseNullSpace) { 3032 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3033 } 3034 } 3035 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3036 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3037 3038 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3039 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3040 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3041 if (j == 1) { 3042 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3043 if (isbddc) { 3044 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3045 } 3046 } 3047 } 3048 /* Check coarse problem if requested */ 3049 if ( dbg_flag && rank_prec_comm == active_rank ) { 3050 KSP check_ksp; 3051 PC check_pc; 3052 Vec check_vec; 3053 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3054 KSPType check_ksp_type; 3055 3056 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3057 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3058 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3059 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3060 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3061 if (issym) check_ksp_type = KSPCG; 3062 else check_ksp_type = KSPGMRES; 3063 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3064 } else { 3065 check_ksp_type = KSPPREONLY; 3066 } 3067 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3068 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3069 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3070 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3071 /* create random vec */ 3072 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3073 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3074 if (CoarseNullSpace) { 3075 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3076 } 3077 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3078 /* solve coarse problem */ 3079 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3080 if (CoarseNullSpace) { 3081 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3082 } 3083 /* check coarse problem residual error */ 3084 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3085 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3086 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3087 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3088 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3089 /* get eigenvalue estimation if inexact */ 3090 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3091 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3092 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3093 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3094 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3095 } 3096 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3097 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3098 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3099 } 3100 if (dbg_flag) { 3101 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3102 } 3103 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3104 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108