1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 105 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 106 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 107 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 108 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 110 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 113 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 116 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 117 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 118 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 119 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 120 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 121 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 122 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 #undef __FUNCT__ 127 #define __FUNCT__ "PCBDDCCreateWorkVectors" 128 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 129 { 130 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 131 PC_IS *pcis = (PC_IS*)pc->data; 132 VecType impVecType; 133 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 134 PetscErrorCode ierr; 135 136 PetscFunctionBegin; 137 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 138 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 139 local_primal_size = n_constraints+n_vertices; 140 n_R = pcis->n-n_vertices; 141 /* local work vectors */ 142 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 143 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 149 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 150 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 151 if (n_constraints) { 152 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 153 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 154 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 161 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 162 { 163 PetscErrorCode ierr; 164 /* pointers to pcis and pcbddc */ 165 PC_IS* pcis = (PC_IS*)pc->data; 166 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 167 /* submatrices of local problem */ 168 Mat A_RV,A_VR,A_VV; 169 /* working matrices */ 170 Mat M1,M2,M3,C_CR; 171 /* working vectors */ 172 Vec vec1_C,vec2_C,vec1_V,vec2_V; 173 /* additional working stuff */ 174 IS is_aux; 175 ISLocalToGlobalMapping BtoNmap; 176 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 177 const PetscScalar *array,*row_cmat_values; 178 const PetscInt *row_cmat_indices,*idx_R_local; 179 PetscInt *vertices,*idx_V_B,*auxindices; 180 PetscInt n_vertices,n_constraints,size_of_constraint; 181 PetscInt i,j,n_R,n_D,n_B; 182 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 183 /* Vector and matrix types */ 184 VecType impVecType; 185 MatType impMatType; 186 /* some shortcuts to scalars */ 187 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 188 /* for debugging purposes */ 189 PetscReal *coarsefunctions_errors,*constraints_errors; 190 191 PetscFunctionBegin; 192 /* get number of vertices and their local indices */ 193 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 194 n_constraints = pcbddc->local_primal_size-n_vertices; 195 /* Set Non-overlapping dimensions */ 196 n_B = pcis->n_B; n_D = pcis->n - n_B; 197 n_R = pcis->n-n_vertices; 198 199 /* Set types for local objects needed by BDDC precondtioner */ 200 impMatType = MATSEQDENSE; 201 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 202 203 /* Allocating some extra storage just to be safe */ 204 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 205 for (i=0;i<pcis->n;i++) auxindices[i]=i; 206 207 /* vertices in boundary numbering */ 208 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 209 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 210 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 211 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 212 if (i != n_vertices) { 213 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 214 } 215 216 /* some work vectors on vertices and/or constraints */ 217 if (n_vertices) { 218 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 219 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 220 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 221 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 222 } 223 if (n_constraints) { 224 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 225 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 226 } 227 228 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 229 if (n_constraints) { 230 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 231 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 232 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 233 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 234 235 /* Extract constraints on R nodes: C_{CR} */ 236 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 237 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 238 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 239 240 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 241 for (i=0;i<n_constraints;i++) { 242 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 243 /* Get row of constraint matrix in R numbering */ 244 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 245 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 246 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 247 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 248 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 249 /* Solve for row of constraint matrix in R numbering */ 250 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 251 /* Set values in local_auxmat2 */ 252 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 253 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 254 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 255 } 256 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 257 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 258 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 259 260 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 261 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 262 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 263 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 264 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 265 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 266 ierr = MatSetUp(M1);CHKERRQ(ierr); 267 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 268 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 269 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 270 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 271 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 272 ierr = MatDestroy(&M2);CHKERRQ(ierr); 273 ierr = MatDestroy(&M3);CHKERRQ(ierr); 274 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 275 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 276 } 277 278 /* Get submatrices from subdomain matrix */ 279 if (n_vertices) { 280 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 281 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 282 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 283 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 284 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 285 } 286 287 /* Matrix of coarse basis functions (local) */ 288 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 289 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 290 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 291 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 292 if (pcbddc->switch_static || pcbddc->dbg_flag) { 293 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 294 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 295 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 296 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 297 } 298 299 if (pcbddc->dbg_flag) { 300 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 301 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 303 } 304 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 305 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 306 307 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 308 309 /* vertices */ 310 for (i=0;i<n_vertices;i++) { 311 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 312 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 313 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 314 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 315 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 316 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 317 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 318 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 319 if (n_constraints) { 320 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 321 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 322 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 323 } 324 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 325 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 326 327 /* Set values in coarse basis function and subdomain part of coarse_mat */ 328 /* coarse basis functions */ 329 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 330 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 331 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 333 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 334 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 335 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 336 if (pcbddc->switch_static || pcbddc->dbg_flag) { 337 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 338 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 339 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 340 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 342 } 343 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 344 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 345 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 347 if (n_constraints) { 348 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 349 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 350 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 351 } 352 353 /* check */ 354 if (pcbddc->dbg_flag) { 355 /* assemble subdomain vector on local nodes */ 356 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 357 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 358 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 360 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 362 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 363 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 364 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 365 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 366 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 367 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 368 if (n_constraints) { 369 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 370 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 371 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 372 } 373 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 376 /* check saddle point solution */ 377 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 378 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 379 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 380 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 381 /* shift by the identity matrix */ 382 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 383 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 384 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 385 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 386 } 387 } 388 389 /* constraints */ 390 for (i=0;i<n_constraints;i++) { 391 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 392 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 393 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 394 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 395 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 396 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 397 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 398 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 399 if (n_vertices) { 400 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 401 } 402 /* Set values in coarse basis function and subdomain part of coarse_mat */ 403 /* coarse basis functions */ 404 j = i+n_vertices; /* don't touch this! */ 405 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 406 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 409 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 411 if (pcbddc->switch_static || pcbddc->dbg_flag) { 412 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 414 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 415 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 417 } 418 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 419 if (n_vertices) { 420 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 421 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 423 } 424 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 425 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 427 428 if (pcbddc->dbg_flag) { 429 /* assemble subdomain vector on nodes */ 430 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 431 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 432 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 434 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 435 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 436 /* assemble subdomain vector of lagrange multipliers */ 437 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 438 if (n_vertices) { 439 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 442 } 443 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 444 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 445 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 449 /* check saddle point solution */ 450 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 451 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 452 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 453 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 454 /* shift by the identity matrix */ 455 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 459 } 460 } 461 /* call assembling routines for local coarse basis */ 462 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 if (pcbddc->switch_static || pcbddc->dbg_flag) { 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 } 468 469 /* compute other basis functions for non-symmetric problems */ 470 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 471 if (!setsym || (setsym && !issym)) { 472 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 473 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 474 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 475 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 478 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 479 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 480 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 } 482 for (i=0;i<pcbddc->local_primal_size;i++) { 483 if (n_constraints) { 484 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 485 for (j=0;j<n_constraints;j++) { 486 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 487 } 488 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 489 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 490 } 491 if (i<n_vertices) { 492 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 493 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 496 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 497 if (n_constraints) { 498 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 499 } 500 } else { 501 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 504 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 505 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 508 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 510 if (i<n_vertices) { 511 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 512 } 513 if (pcbddc->switch_static || pcbddc->dbg_flag) { 514 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 515 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 516 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 517 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 518 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 519 } 520 521 if (pcbddc->dbg_flag) { 522 /* assemble subdomain vector on nodes */ 523 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 524 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 525 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 526 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 527 if (i<n_vertices) { 528 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 532 /* assemble subdomain vector of lagrange multipliers */ 533 for (j=0;j<pcbddc->local_primal_size;j++) { 534 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 535 } 536 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 537 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 538 /* check saddle point solution */ 539 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 540 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 541 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 542 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 543 /* shift by the identity matrix */ 544 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 545 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 546 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 547 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 548 } 549 } 550 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 if (pcbddc->switch_static || pcbddc->dbg_flag) { 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } 556 } 557 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 558 /* Checking coarse_sub_mat and coarse basis functios */ 559 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 560 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 561 if (pcbddc->dbg_flag) { 562 Mat coarse_sub_mat; 563 Mat AUXMAT,TM1,TM2,TM3,TM4; 564 Mat coarse_phi_D,coarse_phi_B; 565 Mat coarse_psi_D,coarse_psi_B; 566 Mat A_II,A_BB,A_IB,A_BI; 567 MatType checkmattype=MATSEQAIJ; 568 PetscReal real_value; 569 570 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 571 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 572 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 573 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 574 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 575 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 576 if (pcbddc->coarse_psi_B) { 577 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 578 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 579 } 580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 581 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 584 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 585 if (pcbddc->coarse_psi_B) { 586 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 593 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 594 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 595 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 } else { 599 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 601 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 } 608 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 609 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 610 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 611 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 612 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 613 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 618 for (i=0;i<pcbddc->local_primal_size;i++) { 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 622 for (i=0;i<pcbddc->local_primal_size;i++) { 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 624 } 625 if (pcbddc->coarse_psi_B) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 627 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 629 } 630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 631 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 633 } 634 } 635 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 636 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 637 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 638 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 639 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 640 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 641 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 642 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 643 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 644 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 645 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 646 if (pcbddc->coarse_psi_B) { 647 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 648 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 649 } 650 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 651 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 652 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 653 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 654 } 655 /* free memory */ 656 if (n_vertices) { 657 ierr = PetscFree(vertices);CHKERRQ(ierr); 658 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 659 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 660 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 661 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 662 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 663 } 664 if (n_constraints) { 665 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 666 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 667 ierr = MatDestroy(&M1);CHKERRQ(ierr); 668 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 669 } 670 ierr = PetscFree(auxindices);CHKERRQ(ierr); 671 /* get back data */ 672 *coarse_submat_vals_n = coarse_submat_vals; 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 678 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 679 { 680 PC_IS* pcis = (PC_IS*)(pc->data); 681 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 682 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 683 /* manage repeated solves */ 684 MatReuse reuse; 685 MatStructure matstruct; 686 PetscErrorCode ierr; 687 688 PetscFunctionBegin; 689 /* get mat flags */ 690 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 691 reuse = MAT_INITIAL_MATRIX; 692 if (pc->setupcalled) { 693 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 694 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 695 if (matstruct == SAME_NONZERO_PATTERN) { 696 reuse = MAT_REUSE_MATRIX; 697 } else { 698 reuse = MAT_INITIAL_MATRIX; 699 } 700 } 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 705 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 707 } 708 709 /* transform local matrices if needed */ 710 if (pcbddc->use_change_of_basis) { 711 Mat change_mat_all; 712 PetscScalar *row_cmat_values; 713 PetscInt *row_cmat_indices; 714 PetscInt *nnz,*is_indices,*temp_indices; 715 PetscInt i,j,k,n_D,n_B; 716 717 /* Get Non-overlapping dimensions */ 718 n_B = pcis->n_B; 719 n_D = pcis->n-n_B; 720 721 /* compute nonzero structure of change of basis on all local nodes */ 722 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 724 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 725 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 727 k=1; 728 for (i=0;i<n_B;i++) { 729 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 730 nnz[is_indices[i]]=j; 731 if (k < j) k = j; 732 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 733 } 734 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 735 /* assemble change of basis matrix on the whole set of local dofs */ 736 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 737 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 738 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 739 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 740 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 741 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 742 for (i=0;i<n_D;i++) { 743 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 744 } 745 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 746 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 747 for (i=0;i<n_B;i++) { 748 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 749 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 750 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 751 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 752 } 753 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 754 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 756 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 757 if (i==1) { 758 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 759 } else { 760 Mat work_mat; 761 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 762 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 763 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 764 } 765 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 766 ierr = PetscFree(nnz);CHKERRQ(ierr); 767 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 768 } else { 769 /* without change of basis, the local matrix is unchanged */ 770 if (!pcbddc->local_mat) { 771 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 772 pcbddc->local_mat = matis->A; 773 } 774 } 775 776 /* get submatrices */ 777 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 778 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 779 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 780 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 786 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 787 { 788 PC_IS* pcis = (PC_IS*)(pc->data); 789 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 790 IS is_aux1,is_aux2; 791 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 792 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 793 PetscBT bitmask; 794 PetscErrorCode ierr; 795 796 PetscFunctionBegin; 797 /* Set Non-overlapping dimensions */ 798 n_B = pcis->n_B; n_D = pcis->n - n_B; 799 /* get vertex indices from constraint matrix */ 800 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 801 /* Set number of constraints */ 802 n_constraints = pcbddc->local_primal_size-n_vertices; 803 /* create auxiliary bitmask */ 804 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 805 for (i=0;i<n_vertices;i++) { 806 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 807 } 808 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 809 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 810 for (i=0, n_R=0; i<pcis->n; i++) { 811 if (!PetscBTLookup(bitmask,i)) { 812 idx_R_local[n_R] = i; 813 n_R++; 814 } 815 } 816 ierr = PetscFree(vertices);CHKERRQ(ierr); 817 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 818 819 /* print some info if requested */ 820 if (pcbddc->dbg_flag) { 821 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 822 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 826 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 827 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 828 } 829 830 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 831 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 832 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 833 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 834 for (i=0; i<n_D; i++) { 835 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 836 } 837 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 838 for (i=0, j=0; i<n_R; i++) { 839 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 840 aux_array1[j++] = i; 841 } 842 } 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 844 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 845 for (i=0, j=0; i<n_B; i++) { 846 if (!PetscBTLookup(bitmask,is_indices[i])) { 847 aux_array2[j++] = i; 848 } 849 } 850 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 851 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 852 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 853 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 854 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 855 856 if (pcbddc->switch_static || pcbddc->dbg_flag) { 857 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 858 for (i=0, j=0; i<n_R; i++) { 859 if (PetscBTLookup(bitmask,idx_R_local[i])) { 860 aux_array1[j++] = i; 861 } 862 } 863 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 864 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 865 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 866 } 867 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 873 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 874 { 875 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 876 877 PetscFunctionBegin; 878 pcbddc->use_exact_dirichlet=use; 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 884 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 885 { 886 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 887 PC_IS *pcis = (PC_IS*)pc->data; 888 PC pc_temp; 889 Mat A_RR; 890 Vec vec1,vec2,vec3; 891 MatStructure matstruct; 892 PetscScalar m_one = -1.0; 893 PetscReal value; 894 PetscInt n_D,n_R,use_exact,use_exact_reduced; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 /* Creating PC contexts for local Dirichlet and Neumann problems */ 899 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 900 901 /* DIRICHLET PROBLEM */ 902 /* Matrix for Dirichlet problem is pcis->A_II */ 903 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 904 if (!pcbddc->ksp_D) { /* create object if not yet build */ 905 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 906 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 907 /* default */ 908 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 909 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 910 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 911 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 912 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 913 } 914 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 915 /* Allow user's customization */ 916 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 917 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 918 if (!n_D) { 919 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 920 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 921 } 922 /* Set Up KSP for Dirichlet problem of BDDC */ 923 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 924 /* set ksp_D into pcis data */ 925 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 926 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 927 pcis->ksp_D = pcbddc->ksp_D; 928 929 /* NEUMANN PROBLEM */ 930 /* Matrix for Neumann problem is A_RR -> we need to create it */ 931 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 932 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 933 if (!pcbddc->ksp_R) { /* create object if not yet build */ 934 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 935 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 936 /* default */ 937 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 938 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 939 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 940 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 941 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 942 } 943 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 944 /* Allow user's customization */ 945 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 946 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 947 if (!n_R) { 948 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 949 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 950 } 951 /* Set Up KSP for Neumann problem of BDDC */ 952 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 953 954 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 955 956 /* Dirichlet */ 957 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 958 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 959 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 960 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 961 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 962 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 963 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 964 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 965 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 966 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 967 /* need to be adapted? */ 968 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 969 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 970 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 971 /* print info */ 972 if (pcbddc->dbg_flag) { 973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 978 } 979 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 980 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 981 } 982 983 /* Neumann */ 984 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 985 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 986 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 987 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 988 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 989 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 990 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 991 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 992 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 993 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 994 /* need to be adapted? */ 995 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 996 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 997 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 998 /* print info */ 999 if (pcbddc->dbg_flag) { 1000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1002 } 1003 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1004 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1005 } 1006 1007 /* free Neumann problem's matrix */ 1008 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1014 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1015 { 1016 PetscErrorCode ierr; 1017 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1018 1019 PetscFunctionBegin; 1020 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1021 if (pcbddc->local_auxmat1) { 1022 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1023 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1024 } 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1030 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1031 { 1032 PetscErrorCode ierr; 1033 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1034 PC_IS* pcis = (PC_IS*) (pc->data); 1035 const PetscScalar zero = 0.0; 1036 1037 PetscFunctionBegin; 1038 /* Application of PHI^T (or PSI^T) */ 1039 if (pcbddc->coarse_psi_B) { 1040 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1041 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1042 } else { 1043 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1044 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1045 } 1046 /* Scatter data of coarse_rhs */ 1047 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1048 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 1050 /* Local solution on R nodes */ 1051 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1053 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1054 if (pcbddc->switch_static) { 1055 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1057 } 1058 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1059 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1060 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 if (pcbddc->switch_static) { 1063 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065 } 1066 1067 /* Coarse solution */ 1068 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1070 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1071 } 1072 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1074 1075 /* Sum contributions from two levels */ 1076 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1077 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1083 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1084 { 1085 PetscErrorCode ierr; 1086 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1087 1088 PetscFunctionBegin; 1089 switch (pcbddc->coarse_communications_type) { 1090 case SCATTERS_BDDC: 1091 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1092 break; 1093 case GATHERS_BDDC: 1094 break; 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1101 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1102 { 1103 PetscErrorCode ierr; 1104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1105 PetscScalar* array_to; 1106 PetscScalar* array_from; 1107 MPI_Comm comm; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1112 switch (pcbddc->coarse_communications_type) { 1113 case SCATTERS_BDDC: 1114 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1115 break; 1116 case GATHERS_BDDC: 1117 if (vec_from) { 1118 ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr); 1119 } 1120 if (vec_to) { 1121 ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr); 1122 } 1123 switch(pcbddc->coarse_problem_type){ 1124 case SEQUENTIAL_BDDC: 1125 if (smode == SCATTER_FORWARD) { 1126 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1127 if (vec_to) { 1128 if (imode == ADD_VALUES) { 1129 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1130 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1131 } 1132 } else { 1133 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1134 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1135 } 1136 } 1137 } 1138 } else { 1139 if (vec_from) { 1140 if (imode == ADD_VALUES) { 1141 MPI_Comm vec_from_comm; 1142 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr); 1143 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type); 1144 } 1145 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1146 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1147 } 1148 } 1149 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1150 } 1151 break; 1152 case REPLICATED_BDDC: 1153 if (smode == SCATTER_FORWARD) { 1154 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1155 if (imode == ADD_VALUES) { 1156 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1157 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1158 } 1159 } else { 1160 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1161 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1162 } 1163 } 1164 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1165 if (imode == ADD_VALUES) { 1166 for (i=0;i<pcbddc->local_primal_size;i++) { 1167 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 1168 } 1169 } else { 1170 for (i=0;i<pcbddc->local_primal_size;i++) { 1171 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1172 } 1173 } 1174 } 1175 break; 1176 case MULTILEVEL_BDDC: 1177 break; 1178 case PARALLEL_BDDC: 1179 break; 1180 } 1181 if (vec_from) { 1182 ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr); 1183 } 1184 if (vec_to) { 1185 ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr); 1186 } 1187 break; 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /* uncomment for testing purposes */ 1193 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1196 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1197 { 1198 PetscErrorCode ierr; 1199 PC_IS* pcis = (PC_IS*)(pc->data); 1200 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1201 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1202 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1203 MatType impMatType=MATSEQAIJ; 1204 /* one and zero */ 1205 PetscScalar one=1.0,zero=0.0; 1206 /* space to store constraints and their local indices */ 1207 PetscScalar *temp_quadrature_constraint; 1208 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1209 /* iterators */ 1210 PetscInt i,j,k,total_counts,temp_start_ptr; 1211 /* stuff to store connected components stored in pcbddc->mat_graph */ 1212 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1213 PetscInt n_ISForFaces,n_ISForEdges; 1214 /* near null space stuff */ 1215 MatNullSpace nearnullsp; 1216 const Vec *nearnullvecs; 1217 Vec *localnearnullsp; 1218 PetscBool nnsp_has_cnst; 1219 PetscInt nnsp_size; 1220 PetscScalar *array; 1221 /* BLAS integers */ 1222 PetscBLASInt lwork,lierr; 1223 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1224 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1225 /* LAPACK working arrays for SVD or POD */ 1226 PetscBool skip_lapack; 1227 PetscScalar *work; 1228 PetscReal *singular_vals; 1229 #if defined(PETSC_USE_COMPLEX) 1230 PetscReal *rwork; 1231 #endif 1232 #if defined(PETSC_MISSING_LAPACK_GESVD) 1233 PetscBLASInt Blas_one_2=1; 1234 PetscScalar *temp_basis,*correlation_mat; 1235 #else 1236 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1237 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1238 #endif 1239 /* change of basis */ 1240 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1241 PetscBool boolforchange; 1242 PetscBT touched,change_basis; 1243 /* auxiliary stuff */ 1244 PetscInt *nnz,*is_indices,*local_to_B; 1245 /* some quantities */ 1246 PetscInt n_vertices,total_primal_vertices; 1247 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1248 1249 1250 PetscFunctionBegin; 1251 /* Get index sets for faces, edges and vertices from graph */ 1252 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1253 pcbddc->use_vertices = PETSC_TRUE; 1254 } 1255 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1256 /* print some info */ 1257 if (pcbddc->dbg_flag) { 1258 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1259 i = 0; 1260 if (ISForVertices) { 1261 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1267 } 1268 /* check if near null space is attached to global mat */ 1269 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1270 if (nearnullsp) { 1271 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1272 } else { /* if near null space is not provided BDDC uses constants by default */ 1273 nnsp_size = 0; 1274 nnsp_has_cnst = PETSC_TRUE; 1275 } 1276 /* get max number of constraints on a single cc */ 1277 max_constraints = nnsp_size; 1278 if (nnsp_has_cnst) max_constraints++; 1279 1280 /* 1281 Evaluate maximum storage size needed by the procedure 1282 - temp_indices will contain start index of each constraint stored as follows 1283 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1284 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1285 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1286 */ 1287 total_counts = n_ISForFaces+n_ISForEdges; 1288 total_counts *= max_constraints; 1289 n_vertices = 0; 1290 if (ISForVertices) { 1291 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1292 } 1293 total_counts += n_vertices; 1294 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1295 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1296 total_counts = 0; 1297 max_size_of_constraint = 0; 1298 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1299 if (i<n_ISForEdges) { 1300 used_IS = &ISForEdges[i]; 1301 } else { 1302 used_IS = &ISForFaces[i-n_ISForEdges]; 1303 } 1304 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1305 total_counts += j; 1306 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1307 } 1308 total_counts *= max_constraints; 1309 total_counts += n_vertices; 1310 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1311 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1312 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1313 /* local to boundary numbering */ 1314 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1315 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1316 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1317 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1318 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1319 /* get local part of global near null space vectors */ 1320 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1321 for (k=0;k<nnsp_size;k++) { 1322 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1323 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 } 1326 1327 /* whether or not to skip lapack calls */ 1328 skip_lapack = PETSC_TRUE; 1329 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1330 1331 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1332 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1333 PetscScalar temp_work; 1334 #if defined(PETSC_MISSING_LAPACK_GESVD) 1335 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1336 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1337 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1338 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1339 #if defined(PETSC_USE_COMPLEX) 1340 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1341 #endif 1342 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1343 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1344 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1345 lwork = -1; 1346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1347 #if !defined(PETSC_USE_COMPLEX) 1348 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1349 #else 1350 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1351 #endif 1352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1353 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1354 #else /* on missing GESVD */ 1355 /* SVD */ 1356 PetscInt max_n,min_n; 1357 max_n = max_size_of_constraint; 1358 min_n = max_constraints; 1359 if (max_size_of_constraint < max_constraints) { 1360 min_n = max_size_of_constraint; 1361 max_n = max_constraints; 1362 } 1363 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1364 #if defined(PETSC_USE_COMPLEX) 1365 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1366 #endif 1367 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1368 lwork = -1; 1369 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1370 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1371 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1372 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1373 #if !defined(PETSC_USE_COMPLEX) 1374 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1375 #else 1376 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1377 #endif 1378 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1379 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1380 #endif /* on missing GESVD */ 1381 /* Allocate optimal workspace */ 1382 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1383 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1384 } 1385 /* Now we can loop on constraining sets */ 1386 total_counts = 0; 1387 temp_indices[0] = 0; 1388 /* vertices */ 1389 if (ISForVertices) { 1390 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1391 if (nnsp_has_cnst) { /* consider all vertices */ 1392 for (i=0;i<n_vertices;i++) { 1393 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1394 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1395 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1396 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1397 total_counts++; 1398 } 1399 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1400 PetscBool used_vertex; 1401 for (i=0;i<n_vertices;i++) { 1402 used_vertex = PETSC_FALSE; 1403 k = 0; 1404 while (!used_vertex && k<nnsp_size) { 1405 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1406 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1407 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1408 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1409 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1410 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1411 total_counts++; 1412 used_vertex = PETSC_TRUE; 1413 } 1414 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1415 k++; 1416 } 1417 } 1418 } 1419 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1420 n_vertices = total_counts; 1421 } 1422 1423 /* edges and faces */ 1424 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1425 if (i<n_ISForEdges) { 1426 used_IS = &ISForEdges[i]; 1427 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1428 } else { 1429 used_IS = &ISForFaces[i-n_ISForEdges]; 1430 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1431 } 1432 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1433 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1434 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1435 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 /* change of basis should not be performed on local periodic nodes */ 1437 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1438 if (nnsp_has_cnst) { 1439 PetscScalar quad_value; 1440 temp_constraints++; 1441 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1442 for (j=0;j<size_of_constraint;j++) { 1443 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1444 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1445 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1446 } 1447 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1448 if (boolforchange) { 1449 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1450 } 1451 total_counts++; 1452 } 1453 for (k=0;k<nnsp_size;k++) { 1454 PetscReal real_value; 1455 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1456 for (j=0;j<size_of_constraint;j++) { 1457 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1458 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1459 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1460 } 1461 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1462 /* check if array is null on the connected component */ 1463 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1464 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1465 if (real_value > 0.0) { /* keep indices and values */ 1466 temp_constraints++; 1467 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1468 if (boolforchange) { 1469 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1470 } 1471 total_counts++; 1472 } 1473 } 1474 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1475 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1476 if (!pcbddc->use_nnsp_true) { 1477 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1478 1479 #if defined(PETSC_MISSING_LAPACK_GESVD) 1480 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1481 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1482 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1483 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1484 from that computed using LAPACKgesvd 1485 -> This is due to a different computation of eigenvectors in LAPACKheev 1486 -> The quality of the POD-computed basis will be the same */ 1487 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1488 /* Store upper triangular part of correlation matrix */ 1489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1491 for (j=0;j<temp_constraints;j++) { 1492 for (k=0;k<j+1;k++) { 1493 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1494 } 1495 } 1496 /* compute eigenvalues and eigenvectors of correlation matrix */ 1497 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1498 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1499 #if !defined(PETSC_USE_COMPLEX) 1500 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1501 #else 1502 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1503 #endif 1504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1505 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1506 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1507 j=0; 1508 while (j < temp_constraints && singular_vals[j] < tol) j++; 1509 total_counts=total_counts-j; 1510 /* scale and copy POD basis into used quadrature memory */ 1511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1513 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1515 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1517 if (j<temp_constraints) { 1518 PetscInt ii; 1519 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1520 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1521 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1523 for (k=0;k<temp_constraints-j;k++) { 1524 for (ii=0;ii<size_of_constraint;ii++) { 1525 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1526 } 1527 } 1528 } 1529 #else /* on missing GESVD */ 1530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1531 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1533 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1534 #if !defined(PETSC_USE_COMPLEX) 1535 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1536 #else 1537 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1538 #endif 1539 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1541 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1542 k = temp_constraints; 1543 if (k > size_of_constraint) k = size_of_constraint; 1544 j = 0; 1545 while (j < k && singular_vals[k-j-1] < tol) j++; 1546 total_counts = total_counts-temp_constraints+k-j; 1547 #endif /* on missing GESVD */ 1548 } 1549 } 1550 /* free index sets of faces, edges and vertices */ 1551 for (i=0;i<n_ISForFaces;i++) { 1552 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1553 } 1554 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1555 for (i=0;i<n_ISForEdges;i++) { 1556 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1557 } 1558 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1559 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1560 1561 /* free workspace */ 1562 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1563 ierr = PetscFree(work);CHKERRQ(ierr); 1564 #if defined(PETSC_USE_COMPLEX) 1565 ierr = PetscFree(rwork);CHKERRQ(ierr); 1566 #endif 1567 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1568 #if defined(PETSC_MISSING_LAPACK_GESVD) 1569 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1570 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1571 #endif 1572 } 1573 for (k=0;k<nnsp_size;k++) { 1574 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1575 } 1576 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1577 1578 /* set quantities in pcbddc data structure */ 1579 /* n_vertices defines the number of subdomain corners in the primal space */ 1580 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1581 pcbddc->local_primal_size = total_counts; 1582 pcbddc->n_vertices = n_vertices; 1583 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1584 1585 /* Create constraint matrix */ 1586 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1587 /* so we need to set it up properly either with or without change of basis */ 1588 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1589 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1590 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1591 /* array to compute a local numbering of constraints : vertices first then constraints */ 1592 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1593 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1594 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1595 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1596 /* auxiliary stuff for basis change */ 1597 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1598 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1599 1600 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1601 total_primal_vertices=0; 1602 for (i=0;i<pcbddc->local_primal_size;i++) { 1603 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1604 if (size_of_constraint == 1) { 1605 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1606 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1607 aux_primal_minloc[total_primal_vertices]=0; 1608 total_primal_vertices++; 1609 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1610 PetscInt min_loc,min_index; 1611 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1612 /* find first untouched local node */ 1613 k = 0; 1614 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1615 min_index = global_indices[k]; 1616 min_loc = k; 1617 /* search the minimum among global nodes already untouched on the cc */ 1618 for (k=1;k<size_of_constraint;k++) { 1619 /* there can be more than one constraint on a single connected component */ 1620 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1621 min_index = global_indices[k]; 1622 min_loc = k; 1623 } 1624 } 1625 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1626 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1627 aux_primal_minloc[total_primal_vertices]=min_loc; 1628 total_primal_vertices++; 1629 } 1630 } 1631 /* free workspace */ 1632 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1633 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1634 /* permute indices in order to have a sorted set of vertices */ 1635 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1636 1637 /* nonzero structure of constraint matrix */ 1638 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1639 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1640 j=total_primal_vertices; 1641 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1642 if (!PetscBTLookup(change_basis,i)) { 1643 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1644 j++; 1645 } 1646 } 1647 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1648 ierr = PetscFree(nnz);CHKERRQ(ierr); 1649 /* set values in constraint matrix */ 1650 for (i=0;i<total_primal_vertices;i++) { 1651 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 total_counts = total_primal_vertices; 1654 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1655 if (!PetscBTLookup(change_basis,i)) { 1656 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1657 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1658 total_counts++; 1659 } 1660 } 1661 /* assembling */ 1662 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1663 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1664 /* 1665 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1666 */ 1667 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1668 if (pcbddc->use_change_of_basis) { 1669 PetscBool qr_needed = PETSC_FALSE; 1670 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1671 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1672 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1673 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1674 /* work arrays */ 1675 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1676 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1677 /* nonzeros per row */ 1678 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1679 if (PetscBTLookup(change_basis,i)) { 1680 qr_needed = PETSC_TRUE; 1681 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1682 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1683 } 1684 } 1685 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1686 ierr = PetscFree(nnz);CHKERRQ(ierr); 1687 /* Set initial identity in the matrix */ 1688 for (i=0;i<pcis->n_B;i++) { 1689 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1690 } 1691 1692 /* Now we loop on the constraints which need a change of basis */ 1693 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1694 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1695 /* Change of basis matrix T computed via QR decomposition of constraints */ 1696 if (qr_needed) { 1697 /* dual and primal dofs on a single cc */ 1698 PetscInt dual_dofs,primal_dofs; 1699 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1700 PetscInt primal_counter; 1701 /* working stuff for GEQRF */ 1702 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1703 PetscBLASInt lqr_work; 1704 /* working stuff for UNGQR */ 1705 PetscScalar *gqr_work,lgqr_work_t; 1706 PetscBLASInt lgqr_work; 1707 /* working stuff for TRTRS */ 1708 PetscScalar *trs_rhs; 1709 PetscBLASInt Blas_NRHS; 1710 /* pointers for values insertion into change of basis matrix */ 1711 PetscInt *start_rows,*start_cols; 1712 PetscScalar *start_vals; 1713 /* working stuff for values insertion */ 1714 PetscBT is_primal; 1715 1716 /* space to store Q */ 1717 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1718 /* first we issue queries for optimal work */ 1719 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1720 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1721 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1722 lqr_work = -1; 1723 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1724 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1725 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1726 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1727 lgqr_work = -1; 1728 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1729 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1731 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1732 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1733 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1734 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1735 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1736 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1737 /* array to store scaling factors for reflectors */ 1738 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1739 /* array to store rhs and solution of triangular solver */ 1740 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1741 /* array to store whether a node is primal or not */ 1742 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1743 for (i=0;i<total_primal_vertices;i++) { 1744 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1745 } 1746 1747 /* allocating workspace for check */ 1748 if (pcbddc->dbg_flag) { 1749 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1750 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1751 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1752 } 1753 1754 /* loop on constraints and see whether or not they need a change of basis */ 1755 /* -> using implicit ordering contained in temp_indices data */ 1756 total_counts = pcbddc->n_vertices; 1757 primal_counter = total_counts; 1758 while (total_counts<pcbddc->local_primal_size) { 1759 primal_dofs = 1; 1760 if (PetscBTLookup(change_basis,total_counts)) { 1761 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1762 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1763 primal_dofs++; 1764 } 1765 /* get constraint info */ 1766 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1767 dual_dofs = size_of_constraint-primal_dofs; 1768 1769 /* copy quadrature constraints for change of basis check */ 1770 if (pcbddc->dbg_flag) { 1771 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1772 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1773 } 1774 1775 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1776 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1777 1778 /* compute QR decomposition of constraints */ 1779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1780 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1782 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1783 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1785 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1786 1787 /* explictly compute R^-T */ 1788 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1789 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1790 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1791 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1792 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1793 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1794 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1795 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1796 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1797 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1798 1799 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1802 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1803 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1804 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1805 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1806 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1807 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1808 1809 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1810 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1811 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1813 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1814 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1816 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1817 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1818 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1819 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1820 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1821 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1822 1823 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1824 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1825 /* insert cols for primal dofs */ 1826 for (j=0;j<primal_dofs;j++) { 1827 start_vals = &qr_basis[j*size_of_constraint]; 1828 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1829 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1830 } 1831 /* insert cols for dual dofs */ 1832 for (j=0,k=0;j<dual_dofs;k++) { 1833 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1834 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1835 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1836 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1837 j++; 1838 } 1839 } 1840 1841 /* check change of basis */ 1842 if (pcbddc->dbg_flag) { 1843 PetscInt ii,jj; 1844 PetscBool valid_qr=PETSC_TRUE; 1845 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1847 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1848 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1849 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1850 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1851 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1852 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1853 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1854 for (jj=0;jj<size_of_constraint;jj++) { 1855 for (ii=0;ii<primal_dofs;ii++) { 1856 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1857 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1858 } 1859 } 1860 if (!valid_qr) { 1861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1862 for (jj=0;jj<size_of_constraint;jj++) { 1863 for (ii=0;ii<primal_dofs;ii++) { 1864 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1865 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1866 } 1867 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1868 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1869 } 1870 } 1871 } 1872 } else { 1873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1874 } 1875 } 1876 /* increment primal counter */ 1877 primal_counter += primal_dofs; 1878 } else { 1879 if (pcbddc->dbg_flag) { 1880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1881 } 1882 } 1883 /* increment constraint counter total_counts */ 1884 total_counts += primal_dofs; 1885 } 1886 if (pcbddc->dbg_flag) { 1887 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1888 ierr = PetscFree(work);CHKERRQ(ierr); 1889 } 1890 /* free workspace */ 1891 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1892 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1893 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1894 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1895 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1896 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1897 } 1898 /* assembling */ 1899 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1900 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 /* 1902 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1903 */ 1904 } 1905 /* free workspace */ 1906 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1907 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1908 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1909 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1910 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1911 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1912 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1913 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1919 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1920 { 1921 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1922 PC_IS *pcis = (PC_IS*)pc->data; 1923 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1924 PetscInt bs,ierr,i,vertex_size; 1925 PetscViewer viewer=pcbddc->dbg_viewer; 1926 1927 PetscFunctionBegin; 1928 /* Init local Graph struct */ 1929 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1930 1931 /* Check validity of the csr graph passed in by the user */ 1932 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1933 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1934 } 1935 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1936 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1937 Mat mat_adj; 1938 const PetscInt *xadj,*adjncy; 1939 PetscBool flg_row=PETSC_TRUE; 1940 1941 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1942 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1943 if (!flg_row) { 1944 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1945 } 1946 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1947 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1948 if (!flg_row) { 1949 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1950 } 1951 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1952 } 1953 1954 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1955 vertex_size = 1; 1956 if (!pcbddc->n_ISForDofs) { 1957 IS *custom_ISForDofs; 1958 1959 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1960 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1961 for (i=0;i<bs;i++) { 1962 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1963 } 1964 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1965 /* remove my references to IS objects */ 1966 for (i=0;i<bs;i++) { 1967 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1968 } 1969 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1970 } else { /* mat block size as vertex size (used for elasticity) */ 1971 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1972 } 1973 1974 /* Setup of Graph */ 1975 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1976 1977 /* Graph's connected components analysis */ 1978 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1979 1980 /* print some info to stdout */ 1981 if (pcbddc->dbg_flag) { 1982 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1989 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 1990 { 1991 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1992 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 n = 0; 1997 vertices = 0; 1998 if (pcbddc->ConstraintMatrix) { 1999 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2000 for (i=0;i<local_primal_size;i++) { 2001 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2002 if (size_of_constraint == 1) n++; 2003 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2004 } 2005 if (vertices_idx) { 2006 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2007 n = 0; 2008 for (i=0;i<local_primal_size;i++) { 2009 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2010 if (size_of_constraint == 1) { 2011 vertices[n++]=row_cmat_indices[0]; 2012 } 2013 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2014 } 2015 } 2016 } 2017 *n_vertices = n; 2018 if (vertices_idx) *vertices_idx = vertices; 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2024 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 2025 { 2026 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2027 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2028 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2029 PetscBT touched; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 n = 0; 2034 constraints_index = 0; 2035 if (pcbddc->ConstraintMatrix) { 2036 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2037 max_size_of_constraint = 0; 2038 for (i=0;i<local_primal_size;i++) { 2039 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2040 if (size_of_constraint > 1) { 2041 n++; 2042 } 2043 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2044 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2045 } 2046 if (constraints_idx) { 2047 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2048 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2049 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2050 n = 0; 2051 for (i=0;i<local_primal_size;i++) { 2052 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2053 if (size_of_constraint > 1) { 2054 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2055 /* find first untouched local node */ 2056 j = 0; 2057 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2058 min_index = row_cmat_global_indices[j]; 2059 min_loc = j; 2060 /* search the minimum among nodes not yet touched on the connected component 2061 since there can be more than one constraint on a single cc */ 2062 for (j=1;j<size_of_constraint;j++) { 2063 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2064 min_index = row_cmat_global_indices[j]; 2065 min_loc = j; 2066 } 2067 } 2068 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2069 constraints_index[n++] = row_cmat_indices[min_loc]; 2070 } 2071 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2072 } 2073 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2074 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2075 } 2076 } 2077 *n_constraints = n; 2078 if (constraints_idx) *constraints_idx = constraints_index; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* the next two functions has been adapted from pcis.c */ 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "PCBDDCApplySchur" 2085 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2086 { 2087 PetscErrorCode ierr; 2088 PC_IS *pcis = (PC_IS*)(pc->data); 2089 2090 PetscFunctionBegin; 2091 if (!vec2_B) { vec2_B = v; } 2092 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2093 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2094 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2095 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2096 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2102 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2103 { 2104 PetscErrorCode ierr; 2105 PC_IS *pcis = (PC_IS*)(pc->data); 2106 2107 PetscFunctionBegin; 2108 if (!vec2_B) { vec2_B = v; } 2109 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2110 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2111 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2112 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2113 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #undef __FUNCT__ 2118 #define __FUNCT__ "PCBDDCSubsetNumbering" 2119 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2120 { 2121 Vec local_vec,global_vec; 2122 IS seqis,paris; 2123 VecScatter scatter_ctx; 2124 PetscScalar *array; 2125 PetscInt *temp_global_dofs; 2126 PetscScalar globalsum; 2127 PetscInt i,j,s; 2128 PetscInt nlocals,first_index,old_index,max_local; 2129 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2130 PetscMPIInt *dof_sizes,*dof_displs; 2131 PetscBool first_found; 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 /* mpi buffers */ 2136 MPI_Comm_size(comm,&size_prec_comm); 2137 MPI_Comm_rank(comm,&rank_prec_comm); 2138 j = ( !rank_prec_comm ? size_prec_comm : 0); 2139 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2140 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2141 /* get maximum size of subset */ 2142 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2143 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2144 max_local = 0; 2145 if (n_local_dofs) { 2146 max_local = temp_global_dofs[0]; 2147 for (i=1;i<n_local_dofs;i++) { 2148 if (max_local < temp_global_dofs[i] ) { 2149 max_local = temp_global_dofs[i]; 2150 } 2151 } 2152 } 2153 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2154 max_global++; 2155 max_local = 0; 2156 if (n_local_dofs) { 2157 max_local = local_dofs[0]; 2158 for (i=1;i<n_local_dofs;i++) { 2159 if (max_local < local_dofs[i] ) { 2160 max_local = local_dofs[i]; 2161 } 2162 } 2163 } 2164 max_local++; 2165 /* allocate workspace */ 2166 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2167 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2168 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2169 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2170 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2171 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2172 /* create scatter */ 2173 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2174 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2175 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2176 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2177 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2178 /* init array */ 2179 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2180 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2181 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2182 if (local_dofs_mult) { 2183 for (i=0;i<n_local_dofs;i++) { 2184 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2185 } 2186 } else { 2187 for (i=0;i<n_local_dofs;i++) { 2188 array[local_dofs[i]]=1.0; 2189 } 2190 } 2191 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2192 /* scatter into global vec and get total number of global dofs */ 2193 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2196 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2197 /* Fill global_vec with cumulative function for global numbering */ 2198 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2199 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2200 nlocals = 0; 2201 first_index = -1; 2202 first_found = PETSC_FALSE; 2203 for (i=0;i<s;i++) { 2204 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2205 first_found = PETSC_TRUE; 2206 first_index = i; 2207 } 2208 nlocals += (PetscInt)PetscRealPart(array[i]); 2209 } 2210 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2211 if (!rank_prec_comm) { 2212 dof_displs[0]=0; 2213 for (i=1;i<size_prec_comm;i++) { 2214 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2215 } 2216 } 2217 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2218 if (first_found) { 2219 array[first_index] += (PetscScalar)nlocals; 2220 old_index = first_index; 2221 for (i=first_index+1;i<s;i++) { 2222 if (PetscRealPart(array[i]) > 0.0) { 2223 array[i] += array[old_index]; 2224 old_index = i; 2225 } 2226 } 2227 } 2228 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2229 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2230 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2231 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2232 /* get global ordering of local dofs */ 2233 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2234 if (local_dofs_mult) { 2235 for (i=0;i<n_local_dofs;i++) { 2236 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2237 } 2238 } else { 2239 for (i=0;i<n_local_dofs;i++) { 2240 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2241 } 2242 } 2243 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2244 /* free workspace */ 2245 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2246 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2247 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2248 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2249 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2250 /* return pointer to global ordering of local dofs */ 2251 *global_numbering_subset = temp_global_dofs; 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2257 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2258 { 2259 PetscInt i,j; 2260 PetscScalar *alphas; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 /* this implements stabilized Gram-Schmidt */ 2265 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2266 for (i=0;i<n;i++) { 2267 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2268 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2269 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2270 } 2271 ierr = PetscFree(alphas);CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 /* BDDC requires metis 5.0.1 for multilevel */ 2276 #if defined(PETSC_HAVE_METIS) 2277 #include "metis.h" 2278 #define MetisInt idx_t 2279 #define MetisScalar real_t 2280 #endif 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2284 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2285 { 2286 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2287 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2288 PC_IS *pcis = (PC_IS*)pc->data; 2289 MPI_Comm prec_comm; 2290 MPI_Comm coarse_comm; 2291 2292 PetscInt coarse_size; 2293 2294 MatNullSpace CoarseNullSpace; 2295 2296 /* common to all choiches */ 2297 PetscScalar *temp_coarse_mat_vals; 2298 PetscScalar *ins_coarse_mat_vals; 2299 PetscInt *ins_local_primal_indices; 2300 PetscMPIInt *localsizes2,*localdispl2; 2301 PetscMPIInt size_prec_comm; 2302 PetscMPIInt rank_prec_comm; 2303 PetscMPIInt active_rank=MPI_PROC_NULL; 2304 PetscMPIInt master_proc=0; 2305 PetscInt ins_local_primal_size; 2306 /* specific to MULTILEVEL_BDDC */ 2307 PetscMPIInt *ranks_recv=0; 2308 PetscMPIInt count_recv=0; 2309 PetscMPIInt rank_coarse_proc_send_to=-1; 2310 PetscMPIInt coarse_color = MPI_UNDEFINED; 2311 ISLocalToGlobalMapping coarse_ISLG; 2312 /* some other variables */ 2313 PetscErrorCode ierr; 2314 MatType coarse_mat_type; 2315 PCType coarse_pc_type; 2316 KSPType coarse_ksp_type; 2317 PC pc_temp; 2318 PetscInt i,j,k; 2319 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2320 /* verbose output viewer */ 2321 PetscViewer viewer=pcbddc->dbg_viewer; 2322 PetscInt dbg_flag=pcbddc->dbg_flag; 2323 2324 PetscInt offset,offset2; 2325 PetscMPIInt im_active,active_procs; 2326 PetscInt *dnz,*onz; 2327 2328 PetscBool setsym,issym=PETSC_FALSE; 2329 2330 PetscFunctionBegin; 2331 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2332 ins_local_primal_indices = 0; 2333 ins_coarse_mat_vals = 0; 2334 localsizes2 = 0; 2335 localdispl2 = 0; 2336 temp_coarse_mat_vals = 0; 2337 coarse_ISLG = 0; 2338 2339 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2340 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2341 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2342 2343 /* Assign global numbering to coarse dofs */ 2344 { 2345 PetscInt *auxlocal_primal,*aux_idx; 2346 PetscMPIInt mpi_local_primal_size; 2347 PetscScalar coarsesum,*array; 2348 2349 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2350 2351 /* Construct needed data structures for message passing */ 2352 j = 0; 2353 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2354 j = size_prec_comm; 2355 } 2356 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2357 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2358 /* Gather local_primal_size information for all processes */ 2359 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2360 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2361 } else { 2362 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2363 } 2364 pcbddc->replicated_primal_size = 0; 2365 for (i=0; i<j; i++) { 2366 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2367 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2368 } 2369 2370 /* This code fragment assumes that the number of local constraints per connected component 2371 is not greater than the number of nodes defined for the connected component 2372 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2373 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2374 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2375 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2376 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2377 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2378 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2379 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2380 /* Compute number of coarse dofs */ 2381 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2382 2383 if (dbg_flag) { 2384 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2385 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2386 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2387 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2388 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2389 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2390 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2391 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2392 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2393 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2394 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2395 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2396 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2397 for (i=0;i<pcis->n;i++) { 2398 if (array[i] == 1.0) { 2399 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2400 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2401 } 2402 } 2403 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2404 for (i=0;i<pcis->n;i++) { 2405 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2406 } 2407 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2408 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2409 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2410 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2411 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2412 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2413 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2414 } 2415 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2416 } 2417 2418 if (dbg_flag) { 2419 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); 2420 if (dbg_flag > 1) { 2421 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2423 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2424 for (i=0;i<pcbddc->local_primal_size;i++) { 2425 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2426 } 2427 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2428 } 2429 } 2430 2431 im_active = 0; 2432 if (pcis->n) im_active = 1; 2433 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2434 2435 /* adapt coarse problem type */ 2436 #if defined(PETSC_HAVE_METIS) 2437 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2438 if (pcbddc->current_level < pcbddc->max_levels) { 2439 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2440 if (dbg_flag) { 2441 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2442 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2443 } 2444 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2445 } 2446 } else { 2447 if (dbg_flag) { 2448 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2449 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2450 } 2451 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2452 } 2453 } 2454 #else 2455 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2456 #endif 2457 2458 switch(pcbddc->coarse_problem_type){ 2459 2460 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2461 { 2462 #if defined(PETSC_HAVE_METIS) 2463 /* we need additional variables */ 2464 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2465 MetisInt *metis_coarse_subdivision; 2466 MetisInt options[METIS_NOPTIONS]; 2467 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2468 PetscMPIInt procs_jumps_coarse_comm; 2469 PetscMPIInt *coarse_subdivision; 2470 PetscMPIInt *total_count_recv; 2471 PetscMPIInt *total_ranks_recv; 2472 PetscMPIInt *displacements_recv; 2473 PetscMPIInt *my_faces_connectivity; 2474 PetscMPIInt *petsc_faces_adjncy; 2475 MetisInt *faces_adjncy; 2476 MetisInt *faces_xadj; 2477 PetscMPIInt *number_of_faces; 2478 PetscMPIInt *faces_displacements; 2479 PetscInt *array_int; 2480 PetscMPIInt my_faces=0; 2481 PetscMPIInt total_faces=0; 2482 PetscInt ranks_stretching_ratio; 2483 2484 /* define some quantities */ 2485 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2486 coarse_mat_type = MATIS; 2487 coarse_pc_type = PCBDDC; 2488 coarse_ksp_type = KSPRICHARDSON; 2489 2490 /* details of coarse decomposition */ 2491 n_subdomains = active_procs; 2492 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2493 ranks_stretching_ratio = size_prec_comm/active_procs; 2494 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2495 2496 #if 0 2497 PetscMPIInt *old_ranks; 2498 PetscInt *new_ranks,*jj,*ii; 2499 MatPartitioning mat_part; 2500 IS coarse_new_decomposition,is_numbering; 2501 PetscViewer viewer_test; 2502 MPI_Comm test_coarse_comm; 2503 PetscMPIInt test_coarse_color; 2504 Mat mat_adj; 2505 /* Create new communicator for coarse problem splitting the old one */ 2506 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2507 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2508 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2509 test_coarse_comm = MPI_COMM_NULL; 2510 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2511 if (im_active) { 2512 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2513 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2514 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2515 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2516 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2517 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2518 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2519 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2520 k = pcis->n_neigh-1; 2521 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2522 ii[0]=0; 2523 ii[1]=k; 2524 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2525 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2526 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2527 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2528 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2529 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2530 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2531 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2532 printf("Setting Nparts %d\n",n_parts); 2533 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2534 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2535 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2536 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2537 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2538 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2539 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2540 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2541 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2542 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2543 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2544 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2545 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2546 } 2547 #endif 2548 2549 /* build CSR graph of subdomains' connectivity */ 2550 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2551 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2552 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2553 for (j=0;j<pcis->n_shared[i];j++){ 2554 array_int[ pcis->shared[i][j] ]+=1; 2555 } 2556 } 2557 for (i=1;i<pcis->n_neigh;i++){ 2558 for (j=0;j<pcis->n_shared[i];j++){ 2559 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2560 my_faces++; 2561 break; 2562 } 2563 } 2564 } 2565 2566 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2567 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2568 my_faces=0; 2569 for (i=1;i<pcis->n_neigh;i++){ 2570 for (j=0;j<pcis->n_shared[i];j++){ 2571 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2572 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2573 my_faces++; 2574 break; 2575 } 2576 } 2577 } 2578 if (rank_prec_comm == master_proc) { 2579 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2580 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2581 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2582 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2583 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2584 } 2585 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2586 if (rank_prec_comm == master_proc) { 2587 faces_xadj[0]=0; 2588 faces_displacements[0]=0; 2589 j=0; 2590 for (i=1;i<size_prec_comm+1;i++) { 2591 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2592 if (number_of_faces[i-1]) { 2593 j++; 2594 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2595 } 2596 } 2597 } 2598 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2599 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2600 ierr = PetscFree(array_int);CHKERRQ(ierr); 2601 if (rank_prec_comm == master_proc) { 2602 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2603 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2604 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2605 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2606 } 2607 2608 if ( rank_prec_comm == master_proc ) { 2609 2610 PetscInt heuristic_for_metis=3; 2611 2612 ncon=1; 2613 faces_nvtxs=n_subdomains; 2614 /* partition graoh induced by face connectivity */ 2615 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2616 ierr = METIS_SetDefaultOptions(options); 2617 /* we need a contiguous partition of the coarse mesh */ 2618 options[METIS_OPTION_CONTIG]=1; 2619 options[METIS_OPTION_NITER]=30; 2620 if (pcbddc->coarsening_ratio > 1) { 2621 if (n_subdomains>n_parts*heuristic_for_metis) { 2622 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2623 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2624 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2625 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2626 } else { 2627 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2628 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2629 } 2630 } else { 2631 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2632 } 2633 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2634 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2635 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2636 2637 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2638 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2639 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2640 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2641 } 2642 2643 /* Create new communicator for coarse problem splitting the old one */ 2644 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2645 coarse_color=0; /* for communicator splitting */ 2646 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2647 } 2648 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2649 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2650 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2651 2652 if ( coarse_color == 0 ) { 2653 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2654 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2655 } else { 2656 rank_coarse_comm = MPI_PROC_NULL; 2657 } 2658 2659 /* master proc take care of arranging and distributing coarse information */ 2660 if (rank_coarse_comm == master_proc) { 2661 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2662 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2663 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2664 /* some initializations */ 2665 displacements_recv[0]=0; 2666 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2667 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2668 for (j=0;j<size_coarse_comm;j++) { 2669 for (i=0;i<size_prec_comm;i++) { 2670 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2671 } 2672 } 2673 /* displacements needed for scatterv of total_ranks_recv */ 2674 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2675 2676 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2677 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2678 for (j=0;j<size_coarse_comm;j++) { 2679 for (i=0;i<size_prec_comm;i++) { 2680 if (coarse_subdivision[i]==j) { 2681 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2682 total_count_recv[j]+=1; 2683 } 2684 } 2685 } 2686 /*for (j=0;j<size_coarse_comm;j++) { 2687 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2688 for (i=0;i<total_count_recv[j];i++) { 2689 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2690 } 2691 printf("\n"); 2692 }*/ 2693 2694 /* identify new decomposition in terms of ranks in the old communicator */ 2695 for (i=0;i<n_subdomains;i++) { 2696 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2697 } 2698 /*printf("coarse_subdivision in old end new ranks\n"); 2699 for (i=0;i<size_prec_comm;i++) 2700 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2701 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2702 } else { 2703 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2704 } 2705 printf("\n");*/ 2706 } 2707 2708 /* Scatter new decomposition for send details */ 2709 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2710 /* Scatter receiving details to members of coarse decomposition */ 2711 if ( coarse_color == 0) { 2712 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2713 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2714 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2715 } 2716 2717 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2718 if (coarse_color == 0) { 2719 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2720 for (i=0;i<count_recv;i++) 2721 printf("%d ",ranks_recv[i]); 2722 printf("\n"); 2723 }*/ 2724 2725 if (rank_prec_comm == master_proc) { 2726 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2727 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2728 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2729 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2730 } 2731 #endif 2732 break; 2733 } 2734 2735 case(REPLICATED_BDDC): 2736 2737 pcbddc->coarse_communications_type = GATHERS_BDDC; 2738 coarse_mat_type = MATSEQAIJ; 2739 coarse_pc_type = PCLU; 2740 coarse_ksp_type = KSPPREONLY; 2741 coarse_comm = PETSC_COMM_SELF; 2742 active_rank = rank_prec_comm; 2743 break; 2744 2745 case(PARALLEL_BDDC): 2746 2747 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2748 coarse_mat_type = MATAIJ; 2749 coarse_pc_type = PCREDUNDANT; 2750 coarse_ksp_type = KSPPREONLY; 2751 coarse_comm = prec_comm; 2752 active_rank = rank_prec_comm; 2753 break; 2754 2755 case(SEQUENTIAL_BDDC): 2756 pcbddc->coarse_communications_type = GATHERS_BDDC; 2757 coarse_mat_type = MATAIJ; 2758 coarse_pc_type = PCLU; 2759 coarse_ksp_type = KSPPREONLY; 2760 coarse_comm = PETSC_COMM_SELF; 2761 active_rank = master_proc; 2762 break; 2763 } 2764 2765 switch(pcbddc->coarse_communications_type){ 2766 2767 case(SCATTERS_BDDC): 2768 { 2769 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2770 2771 IS coarse_IS; 2772 2773 if(pcbddc->coarsening_ratio == 1) { 2774 ins_local_primal_size = pcbddc->local_primal_size; 2775 ins_local_primal_indices = pcbddc->local_primal_indices; 2776 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2777 /* nonzeros */ 2778 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2779 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2780 for (i=0;i<ins_local_primal_size;i++) { 2781 dnz[i] = ins_local_primal_size; 2782 } 2783 } else { 2784 PetscMPIInt send_size; 2785 PetscMPIInt *send_buffer; 2786 PetscInt *aux_ins_indices; 2787 PetscInt ii,jj; 2788 MPI_Request *requests; 2789 2790 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2791 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2792 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2793 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2794 pcbddc->replicated_primal_size = count_recv; 2795 j = 0; 2796 for (i=0;i<count_recv;i++) { 2797 pcbddc->local_primal_displacements[i] = j; 2798 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2799 } 2800 pcbddc->local_primal_displacements[count_recv] = j; 2801 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2802 /* allocate auxiliary space */ 2803 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2804 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2805 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2806 /* allocate stuffs for message massing */ 2807 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2808 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2809 /* send indices to be inserted */ 2810 for (i=0;i<count_recv;i++) { 2811 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2812 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2813 } 2814 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2815 send_size = pcbddc->local_primal_size; 2816 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2817 for (i=0;i<send_size;i++) { 2818 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2819 } 2820 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2821 } 2822 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2823 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2824 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2825 } 2826 j = 0; 2827 for (i=0;i<count_recv;i++) { 2828 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2829 localsizes2[i] = ii*ii; 2830 localdispl2[i] = j; 2831 j += localsizes2[i]; 2832 jj = pcbddc->local_primal_displacements[i]; 2833 /* it counts the coarse subdomains sharing the coarse node */ 2834 for (k=0;k<ii;k++) { 2835 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2836 } 2837 } 2838 /* temp_coarse_mat_vals used to store matrix values to be received */ 2839 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2840 /* evaluate how many values I will insert in coarse mat */ 2841 ins_local_primal_size = 0; 2842 for (i=0;i<coarse_size;i++) { 2843 if (aux_ins_indices[i]) { 2844 ins_local_primal_size++; 2845 } 2846 } 2847 /* evaluate indices I will insert in coarse mat */ 2848 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2849 j = 0; 2850 for(i=0;i<coarse_size;i++) { 2851 if(aux_ins_indices[i]) { 2852 ins_local_primal_indices[j] = i; 2853 j++; 2854 } 2855 } 2856 /* processes partecipating in coarse problem receive matrix data from their friends */ 2857 for (i=0;i<count_recv;i++) { 2858 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2859 } 2860 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2861 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2862 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2863 } 2864 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2865 /* nonzeros */ 2866 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2867 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2868 /* use aux_ins_indices to realize a global to local mapping */ 2869 j=0; 2870 for(i=0;i<coarse_size;i++){ 2871 if(aux_ins_indices[i]==0){ 2872 aux_ins_indices[i]=-1; 2873 } else { 2874 aux_ins_indices[i]=j; 2875 j++; 2876 } 2877 } 2878 for (i=0;i<count_recv;i++) { 2879 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2880 for (k=0;k<j;k++) { 2881 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2882 } 2883 } 2884 /* check */ 2885 for (i=0;i<ins_local_primal_size;i++) { 2886 if (dnz[i] > ins_local_primal_size) { 2887 dnz[i] = ins_local_primal_size; 2888 } 2889 } 2890 ierr = PetscFree(requests);CHKERRQ(ierr); 2891 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2892 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2893 } 2894 /* create local to global mapping needed by coarse MATIS */ 2895 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2896 coarse_comm = prec_comm; 2897 active_rank = rank_prec_comm; 2898 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2899 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2900 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2901 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2902 /* arrays for values insertion */ 2903 ins_local_primal_size = pcbddc->local_primal_size; 2904 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2905 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2906 for (j=0;j<ins_local_primal_size;j++){ 2907 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2908 for (i=0;i<ins_local_primal_size;i++) { 2909 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2910 } 2911 } 2912 } 2913 break; 2914 2915 } 2916 2917 case(GATHERS_BDDC): 2918 { 2919 2920 PetscMPIInt mysize,mysize2; 2921 PetscMPIInt *send_buffer; 2922 2923 if (rank_prec_comm==active_rank) { 2924 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2925 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 2926 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2927 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2928 /* arrays for values insertion */ 2929 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2930 localdispl2[0]=0; 2931 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2932 j=0; 2933 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2934 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2935 } 2936 2937 mysize=pcbddc->local_primal_size; 2938 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2939 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2940 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2941 2942 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2943 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2944 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2945 } else { 2946 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2947 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2950 break; 2951 }/* switch on coarse problem and communications associated with finished */ 2952 } 2953 2954 /* Now create and fill up coarse matrix */ 2955 if ( rank_prec_comm == active_rank ) { 2956 2957 Mat matis_coarse_local_mat; 2958 2959 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2960 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2961 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2962 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2963 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2964 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2965 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2966 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2967 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2968 } else { 2969 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2970 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2971 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2972 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2973 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2974 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2975 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2976 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2977 } 2978 /* preallocation */ 2979 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2980 2981 PetscInt lrows,lcols,bs; 2982 2983 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2984 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2985 ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr); 2986 2987 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2988 2989 Vec vec_dnz,vec_onz; 2990 PetscScalar *my_dnz,*my_onz,*array; 2991 PetscInt *mat_ranges,*row_ownership; 2992 PetscInt coarse_index_row,coarse_index_col,owner; 2993 2994 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 2995 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2996 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 2997 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 2998 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2999 3000 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 3001 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 3002 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3003 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3004 3005 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 3006 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 3007 for (i=0;i<size_prec_comm;i++) { 3008 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 3009 row_ownership[j]=i; 3010 } 3011 } 3012 3013 for (i=0;i<pcbddc->local_primal_size;i++) { 3014 coarse_index_row = pcbddc->local_primal_indices[i]; 3015 owner = row_ownership[coarse_index_row]; 3016 for (j=i;j<pcbddc->local_primal_size;j++) { 3017 owner = row_ownership[coarse_index_row]; 3018 coarse_index_col = pcbddc->local_primal_indices[j]; 3019 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 3020 my_dnz[i] += 1.0; 3021 } else { 3022 my_onz[i] += 1.0; 3023 } 3024 if (i != j) { 3025 owner = row_ownership[coarse_index_col]; 3026 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 3027 my_dnz[j] += 1.0; 3028 } else { 3029 my_onz[j] += 1.0; 3030 } 3031 } 3032 } 3033 } 3034 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 3035 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 3036 if (pcbddc->local_primal_size) { 3037 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 3038 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 3039 } 3040 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 3041 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 3042 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 3043 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 3044 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 3045 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 3046 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 3047 3048 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 3049 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 3050 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 3051 3052 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 3053 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 3054 ierr = PetscFree(my_onz);CHKERRQ(ierr); 3055 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 3056 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 3057 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 3058 } else { 3059 for (k=0;k<size_prec_comm;k++){ 3060 offset=pcbddc->local_primal_displacements[k]; 3061 offset2=localdispl2[k]; 3062 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3063 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3064 for (j=0;j<ins_local_primal_size;j++){ 3065 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3066 } 3067 for (j=0;j<ins_local_primal_size;j++) { 3068 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 3069 } 3070 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3071 } 3072 } 3073 3074 /* check */ 3075 for (i=0;i<lrows;i++) { 3076 if (dnz[i]>lcols) dnz[i]=lcols; 3077 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 3078 } 3079 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 3080 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 3081 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3082 } else { 3083 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3084 ierr = PetscFree(dnz);CHKERRQ(ierr); 3085 } 3086 /* insert values */ 3087 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3088 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3089 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3090 if (pcbddc->coarsening_ratio == 1) { 3091 ins_coarse_mat_vals = coarse_submat_vals; 3092 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3093 } else { 3094 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3095 for (k=0;k<pcbddc->replicated_primal_size;k++) { 3096 offset = pcbddc->local_primal_displacements[k]; 3097 offset2 = localdispl2[k]; 3098 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 3099 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3100 for (j=0;j<ins_local_primal_size;j++){ 3101 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3102 } 3103 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3104 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3105 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3106 } 3107 } 3108 ins_local_primal_indices = 0; 3109 ins_coarse_mat_vals = 0; 3110 } else { 3111 for (k=0;k<size_prec_comm;k++){ 3112 offset=pcbddc->local_primal_displacements[k]; 3113 offset2=localdispl2[k]; 3114 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3115 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3116 for (j=0;j<ins_local_primal_size;j++){ 3117 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3118 } 3119 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3120 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3121 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3122 } 3123 ins_local_primal_indices = 0; 3124 ins_coarse_mat_vals = 0; 3125 } 3126 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3127 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3128 /* symmetry of coarse matrix */ 3129 if (issym) { 3130 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3131 } 3132 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3133 } 3134 3135 /* create loc to glob scatters if needed */ 3136 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3137 IS local_IS,global_IS; 3138 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3139 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3140 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3141 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3142 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3143 } 3144 3145 /* free memory no longer needed */ 3146 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3147 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3148 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3149 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3150 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3151 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3152 3153 /* Compute coarse null space */ 3154 CoarseNullSpace = 0; 3155 if (pcbddc->NullSpace) { 3156 ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr); 3157 } 3158 3159 /* KSP for coarse problem */ 3160 if (rank_prec_comm == active_rank) { 3161 PetscBool isbddc=PETSC_FALSE; 3162 3163 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3164 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3165 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3166 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3167 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3168 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3169 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3170 /* Allow user's customization */ 3171 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3172 /* Set Up PC for coarse problem BDDC */ 3173 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3174 i = pcbddc->current_level+1; 3175 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3176 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3177 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3178 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3179 if (CoarseNullSpace) { 3180 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3181 } 3182 if (dbg_flag) { 3183 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3185 } 3186 } else { 3187 if (CoarseNullSpace) { 3188 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3189 } 3190 } 3191 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3192 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3193 3194 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3195 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3196 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3197 if (j == 1) { 3198 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3199 if (isbddc) { 3200 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3201 } 3202 } 3203 } 3204 /* Check coarse problem if requested */ 3205 if ( dbg_flag && rank_prec_comm == active_rank ) { 3206 KSP check_ksp; 3207 PC check_pc; 3208 Vec check_vec; 3209 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3210 KSPType check_ksp_type; 3211 3212 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3213 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3214 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3215 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3216 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3217 if (issym) check_ksp_type = KSPCG; 3218 else check_ksp_type = KSPGMRES; 3219 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3220 } else { 3221 check_ksp_type = KSPPREONLY; 3222 } 3223 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3224 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3225 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3226 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3227 /* create random vec */ 3228 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3229 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3230 if (CoarseNullSpace) { 3231 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3232 } 3233 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3234 /* solve coarse problem */ 3235 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3236 if (CoarseNullSpace) { 3237 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3238 } 3239 /* check coarse problem residual error */ 3240 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3241 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3242 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3243 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3244 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3245 /* get eigenvalue estimation if inexact */ 3246 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3247 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3248 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3249 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3250 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3251 } 3252 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3253 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3254 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3255 } 3256 if (dbg_flag) { 3257 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3258 } 3259 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3260 3261 PetscFunctionReturn(0); 3262 } 3263 3264