1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscErrorCode ierr; 11 12 PetscFunctionBegin; 13 /* Compute matrix after change of basis and extract local submatrices */ 14 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 15 16 /* Allocate needed vectors */ 17 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 18 19 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 20 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 21 22 /* Setup local solvers ksp_D and ksp_R */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* setup local correction and local part of coarse basis */ 31 ierr = PCBDDCSetUpCoarseLocal(pc);CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "PCBDDCSetLevel" 37 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 38 { 39 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 40 41 PetscFunctionBegin; 42 pcbddc->current_level=level; 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "PCBDDCResetCustomization" 48 PetscErrorCode PCBDDCResetCustomization(PC pc) 49 { 50 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 51 PetscInt i; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 60 for (i=0;i<pcbddc->n_ISForDofs;i++) { 61 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 62 } 63 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCCreateWorkVectors" 118 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 128 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 129 local_primal_size = n_constraints+n_vertices; 130 n_R = pcis->n-n_vertices; 131 /* local work vectors */ 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 137 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 138 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 139 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 140 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 141 if (n_constraints) { 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 145 } 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 151 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc) 152 { 153 PetscErrorCode ierr; 154 /* pointers to pcis and pcbddc */ 155 PC_IS* pcis = (PC_IS*)pc->data; 156 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 157 /* submatrices of local problem */ 158 Mat A_RV,A_VR,A_VV; 159 /* working matrices */ 160 Mat M1,M2,M3,C_CR; 161 /* working vectors */ 162 Vec vec1_C,vec2_C,vec1_V,vec2_V; 163 /* additional working stuff */ 164 IS is_aux; 165 ISLocalToGlobalMapping BtoNmap; 166 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 167 const PetscScalar *array,*row_cmat_values; 168 const PetscInt *row_cmat_indices,*idx_R_local; 169 PetscInt *vertices,*idx_V_B,*auxindices; 170 PetscInt n_vertices,n_constraints,size_of_constraint; 171 PetscInt i,j,n_R,n_D,n_B; 172 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 173 /* Vector and matrix types */ 174 VecType impVecType; 175 MatType impMatType; 176 /* some shortcuts to scalars */ 177 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 178 /* for debugging purposes */ 179 PetscReal *coarsefunctions_errors,*constraints_errors; 180 181 PetscFunctionBegin; 182 /* get number of vertices and their local indices */ 183 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 184 n_constraints = pcbddc->local_primal_size-n_vertices; 185 /* Set Non-overlapping dimensions */ 186 n_B = pcis->n_B; n_D = pcis->n - n_B; 187 n_R = pcis->n-n_vertices; 188 189 /* Set types for local objects needed by BDDC precondtioner */ 190 impMatType = MATSEQDENSE; 191 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 192 193 /* Allocating some extra storage just to be safe */ 194 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 195 for (i=0;i<pcis->n;i++) auxindices[i]=i; 196 197 /* vertices in boundary numbering */ 198 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 199 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 200 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 201 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 202 if (i != n_vertices) { 203 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 204 } 205 206 /* some work vectors on vertices and/or constraints */ 207 if (n_vertices) { 208 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 209 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 210 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 211 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 212 } 213 if (n_constraints) { 214 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 215 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 216 } 217 218 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 219 if (n_constraints) { 220 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 221 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 222 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 223 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 224 225 /* Extract constraints on R nodes: C_{CR} */ 226 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 227 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 228 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 229 230 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 231 for (i=0;i<n_constraints;i++) { 232 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 233 /* Get row of constraint matrix in R numbering */ 234 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 235 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 237 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 238 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 239 /* Solve for row of constraint matrix in R numbering */ 240 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 241 /* Set values in local_auxmat2 */ 242 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 243 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 245 } 246 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 249 250 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 251 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 252 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 253 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 254 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 255 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 256 ierr = MatSetUp(M1);CHKERRQ(ierr); 257 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 258 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 259 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 260 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 261 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 262 ierr = MatDestroy(&M2);CHKERRQ(ierr); 263 ierr = MatDestroy(&M3);CHKERRQ(ierr); 264 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 265 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 266 } 267 268 /* Get submatrices from subdomain matrix */ 269 if (n_vertices) { 270 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 271 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 273 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 274 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 275 } 276 277 /* Matrix of coarse basis functions (local) */ 278 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 279 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 280 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 281 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 282 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 283 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 284 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 285 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 286 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 287 } 288 289 if (pcbddc->dbg_flag) { 290 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 291 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 292 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 293 } 294 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 295 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 296 297 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 298 299 /* vertices */ 300 for (i=0;i<n_vertices;i++) { 301 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 302 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 303 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 304 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 305 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 306 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 307 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 308 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 309 if (n_constraints) { 310 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 311 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 313 } 314 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 315 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 316 317 /* Set values in coarse basis function and subdomain part of coarse_mat */ 318 /* coarse basis functions */ 319 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 320 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 322 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 323 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 324 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 325 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 326 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 327 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 328 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 329 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 330 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 331 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 332 } 333 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 334 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 335 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 337 if (n_constraints) { 338 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 339 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 341 } 342 343 /* check */ 344 if (pcbddc->dbg_flag) { 345 /* assemble subdomain vector on local nodes */ 346 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 348 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 350 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 351 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 352 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 353 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 354 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 355 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 356 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 357 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 358 if (n_constraints) { 359 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 360 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 362 } 363 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 364 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 365 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 366 /* check saddle point solution */ 367 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 368 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 369 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 370 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 371 /* shift by the identity matrix */ 372 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 373 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 376 } 377 } 378 379 /* constraints */ 380 for (i=0;i<n_constraints;i++) { 381 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 382 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 383 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 384 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 385 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 386 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 387 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 if (n_vertices) { 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 } 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 j = i+n_vertices; /* don't touch this! */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 if (n_vertices) { 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 } 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 if (n_vertices) { 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 } 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 449 } 450 } 451 /* call assembling routines for local coarse basis */ 452 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 455 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 } 458 459 /* compute other basis functions for non-symmetric problems */ 460 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 461 if (!setsym || (setsym && !issym)) { 462 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 463 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 464 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 465 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 466 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 467 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 468 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 470 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 471 } 472 for (i=0;i<pcbddc->local_primal_size;i++) { 473 if (n_constraints) { 474 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 475 for (j=0;j<n_constraints;j++) { 476 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 477 } 478 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 479 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 480 } 481 if (i<n_vertices) { 482 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 483 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 485 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 486 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 487 if (n_constraints) { 488 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 489 } 490 } else { 491 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 492 } 493 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 498 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 500 if (i<n_vertices) { 501 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 502 } 503 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 504 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 507 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 509 } 510 511 if (pcbddc->dbg_flag) { 512 /* assemble subdomain vector on nodes */ 513 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 514 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 515 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 516 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 517 if (i<n_vertices) { 518 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 519 } 520 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 522 /* assemble subdomain vector of lagrange multipliers */ 523 for (j=0;j<pcbddc->local_primal_size;j++) { 524 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 527 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 528 /* check saddle point solution */ 529 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 530 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 531 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 532 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 533 /* shift by the identity matrix */ 534 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 535 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 536 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 537 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 538 } 539 } 540 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 541 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 542 if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 543 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } 546 } 547 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 548 /* Checking coarse_sub_mat and coarse basis functios */ 549 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 550 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 551 if (pcbddc->dbg_flag) { 552 Mat coarse_sub_mat; 553 Mat AUXMAT,TM1,TM2,TM3,TM4; 554 Mat coarse_phi_D,coarse_phi_B; 555 Mat coarse_psi_D,coarse_psi_B; 556 Mat A_II,A_BB,A_IB,A_BI; 557 MatType checkmattype=MATSEQAIJ; 558 PetscReal real_value; 559 560 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 561 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 562 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 563 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 564 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 565 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 566 if (pcbddc->coarse_psi_B) { 567 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 568 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 569 } 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 571 572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 573 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 574 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 575 if (pcbddc->coarse_psi_B) { 576 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 577 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 579 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 580 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 582 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 583 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 585 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 586 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 588 } else { 589 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 590 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 591 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 592 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 594 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 } 598 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 599 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 600 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 601 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 602 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 603 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 608 for (i=0;i<pcbddc->local_primal_size;i++) { 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 610 } 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 612 for (i=0;i<pcbddc->local_primal_size;i++) { 613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 614 } 615 if (pcbddc->coarse_psi_B) { 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 617 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 619 } 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 621 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 623 } 624 } 625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 626 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 627 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 628 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 629 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 630 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 631 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 632 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 633 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 634 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 635 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 636 if (pcbddc->coarse_psi_B) { 637 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 638 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 639 } 640 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 641 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 642 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 643 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 644 } 645 /* free memory */ 646 if (n_vertices) { 647 ierr = PetscFree(vertices);CHKERRQ(ierr); 648 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 649 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 652 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 653 } 654 if (n_constraints) { 655 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 656 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 657 ierr = MatDestroy(&M1);CHKERRQ(ierr); 658 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 659 } 660 ierr = PetscFree(auxindices);CHKERRQ(ierr); 661 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 662 ierr = PCBDDCSetUpCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 663 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 669 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 670 { 671 PC_IS* pcis = (PC_IS*)(pc->data); 672 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 673 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 674 /* manage repeated solves */ 675 MatReuse reuse; 676 MatStructure matstruct; 677 PetscErrorCode ierr; 678 679 PetscFunctionBegin; 680 /* get mat flags */ 681 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 682 reuse = MAT_INITIAL_MATRIX; 683 if (pc->setupcalled) { 684 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 685 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 686 if (matstruct == SAME_NONZERO_PATTERN) { 687 reuse = MAT_REUSE_MATRIX; 688 } else { 689 reuse = MAT_INITIAL_MATRIX; 690 } 691 } 692 if (reuse == MAT_INITIAL_MATRIX) { 693 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 695 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 698 } 699 700 /* transform local matrices if needed */ 701 if (pcbddc->use_change_of_basis) { 702 Mat change_mat_all; 703 PetscScalar *row_cmat_values; 704 PetscInt *row_cmat_indices; 705 PetscInt *nnz,*is_indices,*temp_indices; 706 PetscInt i,j,k,n_D,n_B; 707 708 /* Get Non-overlapping dimensions */ 709 n_B = pcis->n_B; 710 n_D = pcis->n-n_B; 711 712 /* compute nonzero structure of change of basis on all local nodes */ 713 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 714 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 715 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 716 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 718 k=1; 719 for (i=0;i<n_B;i++) { 720 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 721 nnz[is_indices[i]]=j; 722 if (k < j) k = j; 723 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 724 } 725 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 /* assemble change of basis matrix on the whole set of local dofs */ 727 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 728 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 729 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 730 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 731 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 732 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 733 for (i=0;i<n_D;i++) { 734 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 735 } 736 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 738 for (i=0;i<n_B;i++) { 739 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 740 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 741 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 742 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 743 } 744 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 747 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 748 if (i==1) { 749 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 750 } else { 751 Mat work_mat; 752 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 753 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 754 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 755 } 756 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 757 ierr = PetscFree(nnz);CHKERRQ(ierr); 758 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 759 } else { 760 /* without change of basis, the local matrix is unchanged */ 761 if (!pcbddc->local_mat) { 762 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 763 pcbddc->local_mat = matis->A; 764 } 765 } 766 767 /* get submatrices */ 768 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 769 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 770 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 771 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 777 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 778 { 779 PC_IS* pcis = (PC_IS*)(pc->data); 780 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 781 IS is_aux1,is_aux2; 782 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 783 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 784 PetscBT bitmask; 785 PetscErrorCode ierr; 786 787 PetscFunctionBegin; 788 /* Set Non-overlapping dimensions */ 789 n_B = pcis->n_B; n_D = pcis->n - n_B; 790 /* get vertex indices from constraint matrix */ 791 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 792 /* Set number of constraints */ 793 n_constraints = pcbddc->local_primal_size-n_vertices; 794 /* create auxiliary bitmask */ 795 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 796 for (i=0;i<n_vertices;i++) { 797 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 798 } 799 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 800 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 801 for (i=0, n_R=0; i<pcis->n; i++) { 802 if (!PetscBTLookup(bitmask,i)) { 803 idx_R_local[n_R] = i; 804 n_R++; 805 } 806 } 807 ierr = PetscFree(vertices);CHKERRQ(ierr); 808 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 809 810 /* print some info if requested */ 811 if (pcbddc->dbg_flag) { 812 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 813 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 819 } 820 821 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 822 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 823 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 824 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0; i<n_D; i++) { 826 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 827 } 828 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 829 for (i=0, j=0; i<n_R; i++) { 830 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 831 aux_array1[j++] = i; 832 } 833 } 834 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 835 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 836 for (i=0, j=0; i<n_B; i++) { 837 if (!PetscBTLookup(bitmask,is_indices[i])) { 838 aux_array2[j++] = i; 839 } 840 } 841 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 842 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 843 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 844 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 845 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 846 847 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 848 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 849 for (i=0, j=0; i<n_R; i++) { 850 if (PetscBTLookup(bitmask,idx_R_local[i])) { 851 aux_array1[j++] = i; 852 } 853 } 854 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 855 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 856 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 857 } 858 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 864 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 865 { 866 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 867 868 PetscFunctionBegin; 869 pcbddc->use_exact_dirichlet=use; 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 875 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 876 { 877 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 878 PC_IS *pcis = (PC_IS*)pc->data; 879 PC pc_temp; 880 Mat A_RR; 881 Vec vec1,vec2,vec3; 882 MatStructure matstruct; 883 PetscScalar m_one = -1.0; 884 PetscReal value; 885 PetscInt n_D,n_R,use_exact,use_exact_reduced; 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 /* Creating PC contexts for local Dirichlet and Neumann problems */ 890 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 891 892 /* DIRICHLET PROBLEM */ 893 /* Matrix for Dirichlet problem is pcis->A_II */ 894 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 895 if (!pcbddc->ksp_D) { /* create object if not yet build */ 896 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 897 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 898 /* default */ 899 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 900 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 901 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 902 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 903 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 904 } 905 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 906 /* Allow user's customization */ 907 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 908 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 909 if (!n_D) { 910 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 911 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 912 } 913 /* Set Up KSP for Dirichlet problem of BDDC */ 914 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 915 /* set ksp_D into pcis data */ 916 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 917 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 918 pcis->ksp_D = pcbddc->ksp_D; 919 920 /* NEUMANN PROBLEM */ 921 /* Matrix for Neumann problem is A_RR -> we need to create it */ 922 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 923 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 924 if (!pcbddc->ksp_R) { /* create object if not yet build */ 925 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 926 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 927 /* default */ 928 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 929 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 930 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 931 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 932 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 933 } 934 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 935 /* Allow user's customization */ 936 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 937 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 938 if (!n_R) { 939 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 940 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 941 } 942 /* Set Up KSP for Neumann problem of BDDC */ 943 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 944 945 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 946 947 /* Dirichlet */ 948 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 949 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 950 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 951 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 952 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 953 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 954 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 955 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 956 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 957 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 958 /* need to be adapted? */ 959 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 960 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 961 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 962 /* print info */ 963 if (pcbddc->dbg_flag) { 964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 969 } 970 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 971 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 972 } 973 974 /* Neumann */ 975 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 976 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 977 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 978 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 979 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 980 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 981 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 982 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 983 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 984 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 985 /* need to be adapted? */ 986 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 987 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 988 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 989 /* print info */ 990 if (pcbddc->dbg_flag) { 991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 } 994 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 995 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 996 } 997 998 /* free Neumann problem's matrix */ 999 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1005 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1006 { 1007 PetscErrorCode ierr; 1008 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1009 1010 PetscFunctionBegin; 1011 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1012 if (pcbddc->local_auxmat1) { 1013 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1014 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1015 } 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1021 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1022 { 1023 PetscErrorCode ierr; 1024 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1025 PC_IS* pcis = (PC_IS*) (pc->data); 1026 const PetscScalar zero = 0.0; 1027 1028 PetscFunctionBegin; 1029 /* Application of PHI^T (or PSI^T) */ 1030 if (pcbddc->coarse_psi_B) { 1031 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1032 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1033 } else { 1034 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1035 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1036 } 1037 /* Scatter data of coarse_rhs */ 1038 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1039 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 1041 /* Local solution on R nodes */ 1042 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1043 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1044 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 if (pcbddc->inexact_prec_type) { 1046 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1048 } 1049 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1050 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1051 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 if (pcbddc->inexact_prec_type) { 1054 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 } 1057 1058 /* Coarse solution */ 1059 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1061 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1062 } 1063 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1065 1066 /* Sum contributions from two levels */ 1067 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1068 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1074 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1075 { 1076 PetscErrorCode ierr; 1077 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1078 1079 PetscFunctionBegin; 1080 switch (pcbddc->coarse_communications_type) { 1081 case SCATTERS_BDDC: 1082 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1083 break; 1084 case GATHERS_BDDC: 1085 break; 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1092 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1093 { 1094 PetscErrorCode ierr; 1095 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1096 PetscScalar* array_to; 1097 PetscScalar* array_from; 1098 MPI_Comm comm; 1099 PetscInt i; 1100 1101 PetscFunctionBegin; 1102 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1103 switch (pcbddc->coarse_communications_type) { 1104 case SCATTERS_BDDC: 1105 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1106 break; 1107 case GATHERS_BDDC: 1108 if (vec_from) { 1109 ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr); 1110 } 1111 if (vec_to) { 1112 ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr); 1113 } 1114 switch(pcbddc->coarse_problem_type){ 1115 case SEQUENTIAL_BDDC: 1116 if (smode == SCATTER_FORWARD) { 1117 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1118 if (vec_to) { 1119 if (imode == ADD_VALUES) { 1120 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1121 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1122 } 1123 } else { 1124 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1125 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1126 } 1127 } 1128 } 1129 } else { 1130 if (vec_from) { 1131 if (imode == ADD_VALUES) { 1132 MPI_Comm vec_from_comm; 1133 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr); 1134 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type); 1135 } 1136 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1137 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1138 } 1139 } 1140 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1141 } 1142 break; 1143 case REPLICATED_BDDC: 1144 if (smode == SCATTER_FORWARD) { 1145 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1146 if (imode == ADD_VALUES) { 1147 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1148 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1149 } 1150 } else { 1151 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1152 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1153 } 1154 } 1155 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1156 if (imode == ADD_VALUES) { 1157 for (i=0;i<pcbddc->local_primal_size;i++) { 1158 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 1159 } 1160 } else { 1161 for (i=0;i<pcbddc->local_primal_size;i++) { 1162 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1163 } 1164 } 1165 } 1166 break; 1167 case MULTILEVEL_BDDC: 1168 break; 1169 case PARALLEL_BDDC: 1170 break; 1171 } 1172 if (vec_from) { 1173 ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr); 1174 } 1175 if (vec_to) { 1176 ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr); 1177 } 1178 break; 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 /* uncomment for testing purposes */ 1184 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1187 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1188 { 1189 PetscErrorCode ierr; 1190 PC_IS* pcis = (PC_IS*)(pc->data); 1191 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1192 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1193 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1194 MatType impMatType=MATSEQAIJ; 1195 /* one and zero */ 1196 PetscScalar one=1.0,zero=0.0; 1197 /* space to store constraints and their local indices */ 1198 PetscScalar *temp_quadrature_constraint; 1199 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1200 /* iterators */ 1201 PetscInt i,j,k,total_counts,temp_start_ptr; 1202 /* stuff to store connected components stored in pcbddc->mat_graph */ 1203 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1204 PetscInt n_ISForFaces,n_ISForEdges; 1205 PetscBool get_faces,get_edges,get_vertices; 1206 /* near null space stuff */ 1207 MatNullSpace nearnullsp; 1208 const Vec *nearnullvecs; 1209 Vec *localnearnullsp; 1210 PetscBool nnsp_has_cnst; 1211 PetscInt nnsp_size; 1212 PetscScalar *array; 1213 /* BLAS integers */ 1214 PetscBLASInt lwork,lierr; 1215 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1216 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1217 /* LAPACK working arrays for SVD or POD */ 1218 PetscBool skip_lapack; 1219 PetscScalar *work; 1220 PetscReal *singular_vals; 1221 #if defined(PETSC_USE_COMPLEX) 1222 PetscReal *rwork; 1223 #endif 1224 #if defined(PETSC_MISSING_LAPACK_GESVD) 1225 PetscBLASInt Blas_one_2=1; 1226 PetscScalar *temp_basis,*correlation_mat; 1227 #else 1228 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1229 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1230 #endif 1231 /* change of basis */ 1232 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1233 PetscBool boolforchange; 1234 PetscBT touched,change_basis; 1235 /* auxiliary stuff */ 1236 PetscInt *nnz,*is_indices,*local_to_B; 1237 /* some quantities */ 1238 PetscInt n_vertices,total_primal_vertices; 1239 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1240 1241 1242 PetscFunctionBegin; 1243 /* Get index sets for faces, edges and vertices from graph */ 1244 get_faces = PETSC_TRUE; 1245 get_edges = PETSC_TRUE; 1246 get_vertices = PETSC_TRUE; 1247 if (pcbddc->vertices_flag) { 1248 get_faces = PETSC_FALSE; 1249 get_edges = PETSC_FALSE; 1250 } 1251 if (pcbddc->constraints_flag) { 1252 get_vertices = PETSC_FALSE; 1253 } 1254 if (pcbddc->faces_flag) { 1255 get_edges = PETSC_FALSE; 1256 } 1257 if (pcbddc->edges_flag) { 1258 get_faces = PETSC_FALSE; 1259 } 1260 /* default */ 1261 if (!get_faces && !get_edges && !get_vertices) { 1262 get_vertices = PETSC_TRUE; 1263 } 1264 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1265 /* print some info */ 1266 if (pcbddc->dbg_flag) { 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1268 i = 0; 1269 if (ISForVertices) { 1270 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1271 } 1272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1276 } 1277 /* check if near null space is attached to global mat */ 1278 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1279 if (nearnullsp) { 1280 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1281 } else { /* if near null space is not provided BDDC uses constants by default */ 1282 nnsp_size = 0; 1283 nnsp_has_cnst = PETSC_TRUE; 1284 } 1285 /* get max number of constraints on a single cc */ 1286 max_constraints = nnsp_size; 1287 if (nnsp_has_cnst) max_constraints++; 1288 1289 /* 1290 Evaluate maximum storage size needed by the procedure 1291 - temp_indices will contain start index of each constraint stored as follows 1292 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1293 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1294 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1295 */ 1296 total_counts = n_ISForFaces+n_ISForEdges; 1297 total_counts *= max_constraints; 1298 n_vertices = 0; 1299 if (ISForVertices) { 1300 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1301 } 1302 total_counts += n_vertices; 1303 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1304 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1305 total_counts = 0; 1306 max_size_of_constraint = 0; 1307 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1308 if (i<n_ISForEdges) { 1309 used_IS = &ISForEdges[i]; 1310 } else { 1311 used_IS = &ISForFaces[i-n_ISForEdges]; 1312 } 1313 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1314 total_counts += j; 1315 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1316 } 1317 total_counts *= max_constraints; 1318 total_counts += n_vertices; 1319 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1320 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1321 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1322 /* local to boundary numbering */ 1323 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1324 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1326 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1327 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1328 /* get local part of global near null space vectors */ 1329 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1330 for (k=0;k<nnsp_size;k++) { 1331 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1332 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 } 1335 1336 /* whether or not to skip lapack calls */ 1337 skip_lapack = PETSC_TRUE; 1338 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1339 1340 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1341 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1342 PetscScalar temp_work; 1343 #if defined(PETSC_MISSING_LAPACK_GESVD) 1344 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1345 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1346 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1347 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1348 #if defined(PETSC_USE_COMPLEX) 1349 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1350 #endif 1351 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1352 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1353 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1354 lwork = -1; 1355 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1356 #if !defined(PETSC_USE_COMPLEX) 1357 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1358 #else 1359 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1360 #endif 1361 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1362 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1363 #else /* on missing GESVD */ 1364 /* SVD */ 1365 PetscInt max_n,min_n; 1366 max_n = max_size_of_constraint; 1367 min_n = max_constraints; 1368 if (max_size_of_constraint < max_constraints) { 1369 min_n = max_size_of_constraint; 1370 max_n = max_constraints; 1371 } 1372 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1373 #if defined(PETSC_USE_COMPLEX) 1374 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1375 #endif 1376 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1377 lwork = -1; 1378 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1379 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1380 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1381 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1382 #if !defined(PETSC_USE_COMPLEX) 1383 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1384 #else 1385 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1386 #endif 1387 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1388 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1389 #endif /* on missing GESVD */ 1390 /* Allocate optimal workspace */ 1391 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1392 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1393 } 1394 /* Now we can loop on constraining sets */ 1395 total_counts = 0; 1396 temp_indices[0] = 0; 1397 /* vertices */ 1398 if (ISForVertices) { 1399 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1400 if (nnsp_has_cnst) { /* consider all vertices */ 1401 for (i=0;i<n_vertices;i++) { 1402 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1403 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1404 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1405 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1406 total_counts++; 1407 } 1408 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1409 PetscBool used_vertex; 1410 for (i=0;i<n_vertices;i++) { 1411 used_vertex = PETSC_FALSE; 1412 k = 0; 1413 while (!used_vertex && k<nnsp_size) { 1414 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1415 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1416 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1417 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1418 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1419 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1420 total_counts++; 1421 used_vertex = PETSC_TRUE; 1422 } 1423 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1424 k++; 1425 } 1426 } 1427 } 1428 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1429 n_vertices = total_counts; 1430 } 1431 1432 /* edges and faces */ 1433 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1434 if (i<n_ISForEdges) { 1435 used_IS = &ISForEdges[i]; 1436 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1437 } else { 1438 used_IS = &ISForFaces[i-n_ISForEdges]; 1439 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1440 } 1441 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1442 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1443 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1444 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1445 /* change of basis should not be performed on local periodic nodes */ 1446 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1447 if (nnsp_has_cnst) { 1448 PetscScalar quad_value; 1449 temp_constraints++; 1450 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1451 for (j=0;j<size_of_constraint;j++) { 1452 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1453 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1454 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1455 } 1456 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1457 if (boolforchange) { 1458 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1459 } 1460 total_counts++; 1461 } 1462 for (k=0;k<nnsp_size;k++) { 1463 PetscReal real_value; 1464 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1465 for (j=0;j<size_of_constraint;j++) { 1466 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1467 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1468 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1469 } 1470 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1471 /* check if array is null on the connected component */ 1472 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1473 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1474 if (real_value > 0.0) { /* keep indices and values */ 1475 temp_constraints++; 1476 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1477 if (boolforchange) { 1478 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1479 } 1480 total_counts++; 1481 } 1482 } 1483 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1484 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1485 if (!pcbddc->use_nnsp_true) { 1486 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1487 1488 #if defined(PETSC_MISSING_LAPACK_GESVD) 1489 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1490 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1491 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1492 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1493 from that computed using LAPACKgesvd 1494 -> This is due to a different computation of eigenvectors in LAPACKheev 1495 -> The quality of the POD-computed basis will be the same */ 1496 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1497 /* Store upper triangular part of correlation matrix */ 1498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1499 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1500 for (j=0;j<temp_constraints;j++) { 1501 for (k=0;k<j+1;k++) { 1502 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1503 } 1504 } 1505 /* compute eigenvalues and eigenvectors of correlation matrix */ 1506 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1507 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1508 #if !defined(PETSC_USE_COMPLEX) 1509 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1510 #else 1511 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1512 #endif 1513 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1514 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1515 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1516 j=0; 1517 while (j < temp_constraints && singular_vals[j] < tol) j++; 1518 total_counts=total_counts-j; 1519 /* scale and copy POD basis into used quadrature memory */ 1520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1521 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1522 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1524 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1526 if (j<temp_constraints) { 1527 PetscInt ii; 1528 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1529 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1530 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1532 for (k=0;k<temp_constraints-j;k++) { 1533 for (ii=0;ii<size_of_constraint;ii++) { 1534 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1535 } 1536 } 1537 } 1538 #else /* on missing GESVD */ 1539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1540 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1541 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1542 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1543 #if !defined(PETSC_USE_COMPLEX) 1544 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1545 #else 1546 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1547 #endif 1548 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1549 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1550 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1551 k = temp_constraints; 1552 if (k > size_of_constraint) k = size_of_constraint; 1553 j = 0; 1554 while (j < k && singular_vals[k-j-1] < tol) j++; 1555 total_counts = total_counts-temp_constraints+k-j; 1556 #endif /* on missing GESVD */ 1557 } 1558 } 1559 /* free index sets of faces, edges and vertices */ 1560 for (i=0;i<n_ISForFaces;i++) { 1561 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1562 } 1563 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1564 for (i=0;i<n_ISForEdges;i++) { 1565 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1566 } 1567 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1568 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1569 1570 /* free workspace */ 1571 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1572 ierr = PetscFree(work);CHKERRQ(ierr); 1573 #if defined(PETSC_USE_COMPLEX) 1574 ierr = PetscFree(rwork);CHKERRQ(ierr); 1575 #endif 1576 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1577 #if defined(PETSC_MISSING_LAPACK_GESVD) 1578 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1579 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1580 #endif 1581 } 1582 for (k=0;k<nnsp_size;k++) { 1583 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1584 } 1585 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1586 1587 /* set quantities in pcbddc data structure */ 1588 /* n_vertices defines the number of subdomain corners in the primal space */ 1589 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1590 pcbddc->local_primal_size = total_counts; 1591 pcbddc->n_vertices = n_vertices; 1592 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1593 1594 /* Create constraint matrix */ 1595 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1596 /* so we need to set it up properly either with or without change of basis */ 1597 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1598 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1599 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1600 /* array to compute a local numbering of constraints : vertices first then constraints */ 1601 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1602 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1603 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1604 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1605 /* auxiliary stuff for basis change */ 1606 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1607 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1608 1609 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1610 total_primal_vertices=0; 1611 for (i=0;i<pcbddc->local_primal_size;i++) { 1612 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1613 if (size_of_constraint == 1) { 1614 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1615 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1616 aux_primal_minloc[total_primal_vertices]=0; 1617 total_primal_vertices++; 1618 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1619 PetscInt min_loc,min_index; 1620 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1621 /* find first untouched local node */ 1622 k = 0; 1623 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1624 min_index = global_indices[k]; 1625 min_loc = k; 1626 /* search the minimum among global nodes already untouched on the cc */ 1627 for (k=1;k<size_of_constraint;k++) { 1628 /* there can be more than one constraint on a single connected component */ 1629 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1630 min_index = global_indices[k]; 1631 min_loc = k; 1632 } 1633 } 1634 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1635 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1636 aux_primal_minloc[total_primal_vertices]=min_loc; 1637 total_primal_vertices++; 1638 } 1639 } 1640 /* free workspace */ 1641 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1642 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1643 /* permute indices in order to have a sorted set of vertices */ 1644 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1645 1646 /* nonzero structure of constraint matrix */ 1647 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1648 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1649 j=total_primal_vertices; 1650 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1651 if (!PetscBTLookup(change_basis,i)) { 1652 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1653 j++; 1654 } 1655 } 1656 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1657 ierr = PetscFree(nnz);CHKERRQ(ierr); 1658 /* set values in constraint matrix */ 1659 for (i=0;i<total_primal_vertices;i++) { 1660 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1661 } 1662 total_counts = total_primal_vertices; 1663 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1664 if (!PetscBTLookup(change_basis,i)) { 1665 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1666 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1667 total_counts++; 1668 } 1669 } 1670 /* assembling */ 1671 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 /* 1674 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1675 */ 1676 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1677 if (pcbddc->use_change_of_basis) { 1678 PetscBool qr_needed = PETSC_FALSE; 1679 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1680 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1681 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1682 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1683 /* work arrays */ 1684 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1685 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1686 /* nonzeros per row */ 1687 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1688 if (PetscBTLookup(change_basis,i)) { 1689 qr_needed = PETSC_TRUE; 1690 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1691 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1692 } 1693 } 1694 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1695 ierr = PetscFree(nnz);CHKERRQ(ierr); 1696 /* Set initial identity in the matrix */ 1697 for (i=0;i<pcis->n_B;i++) { 1698 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1699 } 1700 1701 /* Now we loop on the constraints which need a change of basis */ 1702 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1703 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1704 /* Change of basis matrix T computed via QR decomposition of constraints */ 1705 if (qr_needed) { 1706 /* dual and primal dofs on a single cc */ 1707 PetscInt dual_dofs,primal_dofs; 1708 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1709 PetscInt primal_counter; 1710 /* working stuff for GEQRF */ 1711 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1712 PetscBLASInt lqr_work; 1713 /* working stuff for UNGQR */ 1714 PetscScalar *gqr_work,lgqr_work_t; 1715 PetscBLASInt lgqr_work; 1716 /* working stuff for TRTRS */ 1717 PetscScalar *trs_rhs; 1718 PetscBLASInt Blas_NRHS; 1719 /* pointers for values insertion into change of basis matrix */ 1720 PetscInt *start_rows,*start_cols; 1721 PetscScalar *start_vals; 1722 /* working stuff for values insertion */ 1723 PetscBT is_primal; 1724 1725 /* space to store Q */ 1726 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1727 /* first we issue queries for optimal work */ 1728 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1729 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1731 lqr_work = -1; 1732 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1733 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1734 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1735 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1736 lgqr_work = -1; 1737 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1739 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1741 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1742 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1743 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1744 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1745 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1746 /* array to store scaling factors for reflectors */ 1747 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1748 /* array to store rhs and solution of triangular solver */ 1749 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1750 /* array to store whether a node is primal or not */ 1751 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1752 for (i=0;i<total_primal_vertices;i++) { 1753 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1754 } 1755 1756 /* allocating workspace for check */ 1757 if (pcbddc->dbg_flag) { 1758 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1759 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1760 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1761 } 1762 1763 /* loop on constraints and see whether or not they need a change of basis */ 1764 /* -> using implicit ordering contained in temp_indices data */ 1765 total_counts = pcbddc->n_vertices; 1766 primal_counter = total_counts; 1767 while (total_counts<pcbddc->local_primal_size) { 1768 primal_dofs = 1; 1769 if (PetscBTLookup(change_basis,total_counts)) { 1770 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1771 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1772 primal_dofs++; 1773 } 1774 /* get constraint info */ 1775 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1776 dual_dofs = size_of_constraint-primal_dofs; 1777 1778 /* copy quadrature constraints for change of basis check */ 1779 if (pcbddc->dbg_flag) { 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1781 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1782 } 1783 1784 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1785 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1786 1787 /* compute QR decomposition of constraints */ 1788 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1789 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1790 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1791 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1792 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1793 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1794 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1795 1796 /* explictly compute R^-T */ 1797 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1798 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1799 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1800 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1802 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1804 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1805 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1806 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1807 1808 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1811 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1813 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1814 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1815 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1816 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1817 1818 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1819 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1820 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1822 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1823 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1824 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1825 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1827 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1828 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1829 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1830 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1831 1832 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1833 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1834 /* insert cols for primal dofs */ 1835 for (j=0;j<primal_dofs;j++) { 1836 start_vals = &qr_basis[j*size_of_constraint]; 1837 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1838 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1839 } 1840 /* insert cols for dual dofs */ 1841 for (j=0,k=0;j<dual_dofs;k++) { 1842 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1843 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1844 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1845 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1846 j++; 1847 } 1848 } 1849 1850 /* check change of basis */ 1851 if (pcbddc->dbg_flag) { 1852 PetscInt ii,jj; 1853 PetscBool valid_qr=PETSC_TRUE; 1854 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1856 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1859 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1860 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1861 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1862 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1863 for (jj=0;jj<size_of_constraint;jj++) { 1864 for (ii=0;ii<primal_dofs;ii++) { 1865 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1866 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1867 } 1868 } 1869 if (!valid_qr) { 1870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1871 for (jj=0;jj<size_of_constraint;jj++) { 1872 for (ii=0;ii<primal_dofs;ii++) { 1873 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1874 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1875 } 1876 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1877 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1878 } 1879 } 1880 } 1881 } else { 1882 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1883 } 1884 } 1885 /* increment primal counter */ 1886 primal_counter += primal_dofs; 1887 } else { 1888 if (pcbddc->dbg_flag) { 1889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1890 } 1891 } 1892 /* increment constraint counter total_counts */ 1893 total_counts += primal_dofs; 1894 } 1895 if (pcbddc->dbg_flag) { 1896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1897 ierr = PetscFree(work);CHKERRQ(ierr); 1898 } 1899 /* free workspace */ 1900 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1901 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1902 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1903 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1904 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1905 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1906 } 1907 /* assembling */ 1908 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1909 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1910 /* 1911 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1912 */ 1913 } 1914 /* free workspace */ 1915 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1916 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1917 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1918 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1919 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1920 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1921 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1922 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1928 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1929 { 1930 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1931 PC_IS *pcis = (PC_IS*)pc->data; 1932 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1933 PetscInt bs,ierr,i,vertex_size; 1934 PetscViewer viewer=pcbddc->dbg_viewer; 1935 1936 PetscFunctionBegin; 1937 /* Init local Graph struct */ 1938 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1939 1940 /* Check validity of the csr graph passed in by the user */ 1941 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1942 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1943 } 1944 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1945 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1946 Mat mat_adj; 1947 const PetscInt *xadj,*adjncy; 1948 PetscBool flg_row=PETSC_TRUE; 1949 1950 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1951 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1952 if (!flg_row) { 1953 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1954 } 1955 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1956 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1957 if (!flg_row) { 1958 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1959 } 1960 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1961 } 1962 1963 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1964 vertex_size = 1; 1965 if (!pcbddc->n_ISForDofs) { 1966 IS *custom_ISForDofs; 1967 1968 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1969 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1970 for (i=0;i<bs;i++) { 1971 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1972 } 1973 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1974 /* remove my references to IS objects */ 1975 for (i=0;i<bs;i++) { 1976 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1977 } 1978 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1979 } else { /* mat block size as vertex size (used for elasticity) */ 1980 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1981 } 1982 1983 /* Setup of Graph */ 1984 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1985 1986 /* Graph's connected components analysis */ 1987 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1988 1989 /* print some info to stdout */ 1990 if (pcbddc->dbg_flag) { 1991 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1998 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 1999 { 2000 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2001 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2002 PetscErrorCode ierr; 2003 2004 PetscFunctionBegin; 2005 n = 0; 2006 vertices = 0; 2007 if (pcbddc->ConstraintMatrix) { 2008 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2009 for (i=0;i<local_primal_size;i++) { 2010 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2011 if (size_of_constraint == 1) n++; 2012 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2013 } 2014 if (vertices_idx) { 2015 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2016 n = 0; 2017 for (i=0;i<local_primal_size;i++) { 2018 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2019 if (size_of_constraint == 1) { 2020 vertices[n++]=row_cmat_indices[0]; 2021 } 2022 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2023 } 2024 } 2025 } 2026 *n_vertices = n; 2027 if (vertices_idx) *vertices_idx = vertices; 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2033 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 2034 { 2035 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2036 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2037 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2038 PetscBT touched; 2039 PetscErrorCode ierr; 2040 2041 PetscFunctionBegin; 2042 n = 0; 2043 constraints_index = 0; 2044 if (pcbddc->ConstraintMatrix) { 2045 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2046 max_size_of_constraint = 0; 2047 for (i=0;i<local_primal_size;i++) { 2048 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2049 if (size_of_constraint > 1) { 2050 n++; 2051 } 2052 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2053 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2054 } 2055 if (constraints_idx) { 2056 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2057 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2058 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2059 n = 0; 2060 for (i=0;i<local_primal_size;i++) { 2061 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2062 if (size_of_constraint > 1) { 2063 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2064 /* find first untouched local node */ 2065 j = 0; 2066 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2067 min_index = row_cmat_global_indices[j]; 2068 min_loc = j; 2069 /* search the minimum among nodes not yet touched on the connected component 2070 since there can be more than one constraint on a single cc */ 2071 for (j=1;j<size_of_constraint;j++) { 2072 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2073 min_index = row_cmat_global_indices[j]; 2074 min_loc = j; 2075 } 2076 } 2077 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2078 constraints_index[n++] = row_cmat_indices[min_loc]; 2079 } 2080 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2081 } 2082 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2083 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2084 } 2085 } 2086 *n_constraints = n; 2087 if (constraints_idx) *constraints_idx = constraints_index; 2088 PetscFunctionReturn(0); 2089 } 2090 2091 /* the next two functions has been adapted from pcis.c */ 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "PCBDDCApplySchur" 2094 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2095 { 2096 PetscErrorCode ierr; 2097 PC_IS *pcis = (PC_IS*)(pc->data); 2098 2099 PetscFunctionBegin; 2100 if (!vec2_B) { vec2_B = v; } 2101 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2102 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2103 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2104 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2105 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2106 PetscFunctionReturn(0); 2107 } 2108 2109 #undef __FUNCT__ 2110 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2111 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2112 { 2113 PetscErrorCode ierr; 2114 PC_IS *pcis = (PC_IS*)(pc->data); 2115 2116 PetscFunctionBegin; 2117 if (!vec2_B) { vec2_B = v; } 2118 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2119 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2120 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2121 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2122 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "PCBDDCSubsetNumbering" 2128 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2129 { 2130 Vec local_vec,global_vec; 2131 IS seqis,paris; 2132 VecScatter scatter_ctx; 2133 PetscScalar *array; 2134 PetscInt *temp_global_dofs; 2135 PetscScalar globalsum; 2136 PetscInt i,j,s; 2137 PetscInt nlocals,first_index,old_index,max_local; 2138 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2139 PetscMPIInt *dof_sizes,*dof_displs; 2140 PetscBool first_found; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 /* mpi buffers */ 2145 MPI_Comm_size(comm,&size_prec_comm); 2146 MPI_Comm_rank(comm,&rank_prec_comm); 2147 j = ( !rank_prec_comm ? size_prec_comm : 0); 2148 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2149 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2150 /* get maximum size of subset */ 2151 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2152 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2153 max_local = 0; 2154 if (n_local_dofs) { 2155 max_local = temp_global_dofs[0]; 2156 for (i=1;i<n_local_dofs;i++) { 2157 if (max_local < temp_global_dofs[i] ) { 2158 max_local = temp_global_dofs[i]; 2159 } 2160 } 2161 } 2162 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2163 max_global++; 2164 max_local = 0; 2165 if (n_local_dofs) { 2166 max_local = local_dofs[0]; 2167 for (i=1;i<n_local_dofs;i++) { 2168 if (max_local < local_dofs[i] ) { 2169 max_local = local_dofs[i]; 2170 } 2171 } 2172 } 2173 max_local++; 2174 /* allocate workspace */ 2175 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2176 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2177 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2178 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2179 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2180 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2181 /* create scatter */ 2182 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2183 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2184 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2185 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2186 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2187 /* init array */ 2188 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2189 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2190 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2191 if (local_dofs_mult) { 2192 for (i=0;i<n_local_dofs;i++) { 2193 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2194 } 2195 } else { 2196 for (i=0;i<n_local_dofs;i++) { 2197 array[local_dofs[i]]=1.0; 2198 } 2199 } 2200 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2201 /* scatter into global vec and get total number of global dofs */ 2202 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2203 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2204 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2205 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2206 /* Fill global_vec with cumulative function for global numbering */ 2207 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2208 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2209 nlocals = 0; 2210 first_index = -1; 2211 first_found = PETSC_FALSE; 2212 for (i=0;i<s;i++) { 2213 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2214 first_found = PETSC_TRUE; 2215 first_index = i; 2216 } 2217 nlocals += (PetscInt)PetscRealPart(array[i]); 2218 } 2219 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2220 if (!rank_prec_comm) { 2221 dof_displs[0]=0; 2222 for (i=1;i<size_prec_comm;i++) { 2223 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2224 } 2225 } 2226 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2227 if (first_found) { 2228 array[first_index] += (PetscScalar)nlocals; 2229 old_index = first_index; 2230 for (i=first_index+1;i<s;i++) { 2231 if (PetscRealPart(array[i]) > 0.0) { 2232 array[i] += array[old_index]; 2233 old_index = i; 2234 } 2235 } 2236 } 2237 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2238 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2239 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2240 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2241 /* get global ordering of local dofs */ 2242 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2243 if (local_dofs_mult) { 2244 for (i=0;i<n_local_dofs;i++) { 2245 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2246 } 2247 } else { 2248 for (i=0;i<n_local_dofs;i++) { 2249 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2250 } 2251 } 2252 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2253 /* free workspace */ 2254 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2255 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2256 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2257 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2258 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2259 /* return pointer to global ordering of local dofs */ 2260 *global_numbering_subset = temp_global_dofs; 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2266 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2267 { 2268 PetscInt i,j; 2269 PetscScalar *alphas; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 /* this implements stabilized Gram-Schmidt */ 2274 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2275 for (i=0;i<n;i++) { 2276 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2277 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2278 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2279 } 2280 ierr = PetscFree(alphas);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 /* BDDC requires metis 5.0.1 for multilevel */ 2285 #if defined(PETSC_HAVE_METIS) 2286 #include "metis.h" 2287 #define MetisInt idx_t 2288 #define MetisScalar real_t 2289 #endif 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "PCBDDCSetUpCoarseEnvironment" 2293 PetscErrorCode PCBDDCSetUpCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2294 { 2295 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2296 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2297 PC_IS *pcis = (PC_IS*)pc->data; 2298 MPI_Comm prec_comm; 2299 MPI_Comm coarse_comm; 2300 2301 PetscInt coarse_size; 2302 2303 MatNullSpace CoarseNullSpace; 2304 2305 /* common to all choiches */ 2306 PetscScalar *temp_coarse_mat_vals; 2307 PetscScalar *ins_coarse_mat_vals; 2308 PetscInt *ins_local_primal_indices; 2309 PetscMPIInt *localsizes2,*localdispl2; 2310 PetscMPIInt size_prec_comm; 2311 PetscMPIInt rank_prec_comm; 2312 PetscMPIInt active_rank=MPI_PROC_NULL; 2313 PetscMPIInt master_proc=0; 2314 PetscInt ins_local_primal_size; 2315 /* specific to MULTILEVEL_BDDC */ 2316 PetscMPIInt *ranks_recv=0; 2317 PetscMPIInt count_recv=0; 2318 PetscMPIInt rank_coarse_proc_send_to=-1; 2319 PetscMPIInt coarse_color = MPI_UNDEFINED; 2320 ISLocalToGlobalMapping coarse_ISLG; 2321 /* some other variables */ 2322 PetscErrorCode ierr; 2323 MatType coarse_mat_type; 2324 PCType coarse_pc_type; 2325 KSPType coarse_ksp_type; 2326 PC pc_temp; 2327 PetscInt i,j,k; 2328 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2329 /* verbose output viewer */ 2330 PetscViewer viewer=pcbddc->dbg_viewer; 2331 PetscInt dbg_flag=pcbddc->dbg_flag; 2332 2333 PetscInt offset,offset2; 2334 PetscMPIInt im_active,active_procs; 2335 PetscInt *dnz,*onz; 2336 2337 PetscBool setsym,issym=PETSC_FALSE; 2338 2339 PetscFunctionBegin; 2340 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2341 ins_local_primal_indices = 0; 2342 ins_coarse_mat_vals = 0; 2343 localsizes2 = 0; 2344 localdispl2 = 0; 2345 temp_coarse_mat_vals = 0; 2346 coarse_ISLG = 0; 2347 2348 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2349 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2350 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2351 2352 /* Assign global numbering to coarse dofs */ 2353 { 2354 PetscInt *auxlocal_primal,*aux_idx; 2355 PetscMPIInt mpi_local_primal_size; 2356 PetscScalar coarsesum,*array; 2357 2358 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2359 2360 /* Construct needed data structures for message passing */ 2361 j = 0; 2362 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2363 j = size_prec_comm; 2364 } 2365 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2366 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2367 /* Gather local_primal_size information for all processes */ 2368 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2369 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2370 } else { 2371 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2372 } 2373 pcbddc->replicated_primal_size = 0; 2374 for (i=0; i<j; i++) { 2375 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2376 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2377 } 2378 2379 /* This code fragment assumes that the number of local constraints per connected component 2380 is not greater than the number of nodes defined for the connected component 2381 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2382 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2383 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2384 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2385 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2386 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2387 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2388 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2389 /* Compute number of coarse dofs */ 2390 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2391 2392 if (dbg_flag) { 2393 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2394 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2395 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2396 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2397 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2398 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2399 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2400 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2401 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2402 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2403 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2404 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2405 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2406 for (i=0;i<pcis->n;i++) { 2407 if (array[i] == 1.0) { 2408 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2409 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2410 } 2411 } 2412 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2413 for (i=0;i<pcis->n;i++) { 2414 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2415 } 2416 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2417 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2418 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2419 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2420 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2421 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2423 } 2424 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2425 } 2426 2427 if (dbg_flag) { 2428 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); 2429 if (dbg_flag > 1) { 2430 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2432 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2433 for (i=0;i<pcbddc->local_primal_size;i++) { 2434 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2435 } 2436 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2437 } 2438 } 2439 2440 im_active = 0; 2441 if (pcis->n) im_active = 1; 2442 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2443 2444 /* adapt coarse problem type */ 2445 #if defined(PETSC_HAVE_METIS) 2446 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2447 if (pcbddc->current_level < pcbddc->max_levels) { 2448 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2449 if (dbg_flag) { 2450 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2451 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2452 } 2453 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2454 } 2455 } else { 2456 if (dbg_flag) { 2457 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2458 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2459 } 2460 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2461 } 2462 } 2463 #else 2464 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2465 #endif 2466 2467 switch(pcbddc->coarse_problem_type){ 2468 2469 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2470 { 2471 #if defined(PETSC_HAVE_METIS) 2472 /* we need additional variables */ 2473 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2474 MetisInt *metis_coarse_subdivision; 2475 MetisInt options[METIS_NOPTIONS]; 2476 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2477 PetscMPIInt procs_jumps_coarse_comm; 2478 PetscMPIInt *coarse_subdivision; 2479 PetscMPIInt *total_count_recv; 2480 PetscMPIInt *total_ranks_recv; 2481 PetscMPIInt *displacements_recv; 2482 PetscMPIInt *my_faces_connectivity; 2483 PetscMPIInt *petsc_faces_adjncy; 2484 MetisInt *faces_adjncy; 2485 MetisInt *faces_xadj; 2486 PetscMPIInt *number_of_faces; 2487 PetscMPIInt *faces_displacements; 2488 PetscInt *array_int; 2489 PetscMPIInt my_faces=0; 2490 PetscMPIInt total_faces=0; 2491 PetscInt ranks_stretching_ratio; 2492 2493 /* define some quantities */ 2494 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2495 coarse_mat_type = MATIS; 2496 coarse_pc_type = PCBDDC; 2497 coarse_ksp_type = KSPRICHARDSON; 2498 2499 /* details of coarse decomposition */ 2500 n_subdomains = active_procs; 2501 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2502 ranks_stretching_ratio = size_prec_comm/active_procs; 2503 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2504 2505 #if 0 2506 PetscMPIInt *old_ranks; 2507 PetscInt *new_ranks,*jj,*ii; 2508 MatPartitioning mat_part; 2509 IS coarse_new_decomposition,is_numbering; 2510 PetscViewer viewer_test; 2511 MPI_Comm test_coarse_comm; 2512 PetscMPIInt test_coarse_color; 2513 Mat mat_adj; 2514 /* Create new communicator for coarse problem splitting the old one */ 2515 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2516 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2517 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2518 test_coarse_comm = MPI_COMM_NULL; 2519 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2520 if (im_active) { 2521 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2522 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2523 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2524 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2525 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2526 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2527 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2528 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2529 k = pcis->n_neigh-1; 2530 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2531 ii[0]=0; 2532 ii[1]=k; 2533 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2534 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2535 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2536 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2537 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2538 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2539 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2540 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2541 printf("Setting Nparts %d\n",n_parts); 2542 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2543 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2544 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2545 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2546 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2547 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2548 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2549 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2550 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2551 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2552 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2553 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2554 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2555 } 2556 #endif 2557 2558 /* build CSR graph of subdomains' connectivity */ 2559 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2560 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2561 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2562 for (j=0;j<pcis->n_shared[i];j++){ 2563 array_int[ pcis->shared[i][j] ]+=1; 2564 } 2565 } 2566 for (i=1;i<pcis->n_neigh;i++){ 2567 for (j=0;j<pcis->n_shared[i];j++){ 2568 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2569 my_faces++; 2570 break; 2571 } 2572 } 2573 } 2574 2575 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2576 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2577 my_faces=0; 2578 for (i=1;i<pcis->n_neigh;i++){ 2579 for (j=0;j<pcis->n_shared[i];j++){ 2580 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2581 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2582 my_faces++; 2583 break; 2584 } 2585 } 2586 } 2587 if (rank_prec_comm == master_proc) { 2588 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2589 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2590 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2591 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2592 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2593 } 2594 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2595 if (rank_prec_comm == master_proc) { 2596 faces_xadj[0]=0; 2597 faces_displacements[0]=0; 2598 j=0; 2599 for (i=1;i<size_prec_comm+1;i++) { 2600 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2601 if (number_of_faces[i-1]) { 2602 j++; 2603 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2604 } 2605 } 2606 } 2607 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2608 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2609 ierr = PetscFree(array_int);CHKERRQ(ierr); 2610 if (rank_prec_comm == master_proc) { 2611 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2612 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2613 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2614 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2615 } 2616 2617 if ( rank_prec_comm == master_proc ) { 2618 2619 PetscInt heuristic_for_metis=3; 2620 2621 ncon=1; 2622 faces_nvtxs=n_subdomains; 2623 /* partition graoh induced by face connectivity */ 2624 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2625 ierr = METIS_SetDefaultOptions(options); 2626 /* we need a contiguous partition of the coarse mesh */ 2627 options[METIS_OPTION_CONTIG]=1; 2628 options[METIS_OPTION_NITER]=30; 2629 if (pcbddc->coarsening_ratio > 1) { 2630 if (n_subdomains>n_parts*heuristic_for_metis) { 2631 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2632 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2633 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2634 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2635 } else { 2636 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2637 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2638 } 2639 } else { 2640 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2641 } 2642 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2643 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2644 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2645 2646 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2647 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2648 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2649 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2650 } 2651 2652 /* Create new communicator for coarse problem splitting the old one */ 2653 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2654 coarse_color=0; /* for communicator splitting */ 2655 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2656 } 2657 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2658 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2659 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2660 2661 if ( coarse_color == 0 ) { 2662 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2663 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2664 } else { 2665 rank_coarse_comm = MPI_PROC_NULL; 2666 } 2667 2668 /* master proc take care of arranging and distributing coarse information */ 2669 if (rank_coarse_comm == master_proc) { 2670 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2671 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2672 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2673 /* some initializations */ 2674 displacements_recv[0]=0; 2675 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2676 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2677 for (j=0;j<size_coarse_comm;j++) { 2678 for (i=0;i<size_prec_comm;i++) { 2679 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2680 } 2681 } 2682 /* displacements needed for scatterv of total_ranks_recv */ 2683 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2684 2685 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2686 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2687 for (j=0;j<size_coarse_comm;j++) { 2688 for (i=0;i<size_prec_comm;i++) { 2689 if (coarse_subdivision[i]==j) { 2690 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2691 total_count_recv[j]+=1; 2692 } 2693 } 2694 } 2695 /*for (j=0;j<size_coarse_comm;j++) { 2696 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2697 for (i=0;i<total_count_recv[j];i++) { 2698 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2699 } 2700 printf("\n"); 2701 }*/ 2702 2703 /* identify new decomposition in terms of ranks in the old communicator */ 2704 for (i=0;i<n_subdomains;i++) { 2705 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2706 } 2707 /*printf("coarse_subdivision in old end new ranks\n"); 2708 for (i=0;i<size_prec_comm;i++) 2709 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2710 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2711 } else { 2712 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2713 } 2714 printf("\n");*/ 2715 } 2716 2717 /* Scatter new decomposition for send details */ 2718 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2719 /* Scatter receiving details to members of coarse decomposition */ 2720 if ( coarse_color == 0) { 2721 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2722 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2723 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2724 } 2725 2726 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2727 if (coarse_color == 0) { 2728 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2729 for (i=0;i<count_recv;i++) 2730 printf("%d ",ranks_recv[i]); 2731 printf("\n"); 2732 }*/ 2733 2734 if (rank_prec_comm == master_proc) { 2735 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2736 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2737 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2738 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2739 } 2740 #endif 2741 break; 2742 } 2743 2744 case(REPLICATED_BDDC): 2745 2746 pcbddc->coarse_communications_type = GATHERS_BDDC; 2747 coarse_mat_type = MATSEQAIJ; 2748 coarse_pc_type = PCLU; 2749 coarse_ksp_type = KSPPREONLY; 2750 coarse_comm = PETSC_COMM_SELF; 2751 active_rank = rank_prec_comm; 2752 break; 2753 2754 case(PARALLEL_BDDC): 2755 2756 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2757 coarse_mat_type = MATAIJ; 2758 coarse_pc_type = PCREDUNDANT; 2759 coarse_ksp_type = KSPPREONLY; 2760 coarse_comm = prec_comm; 2761 active_rank = rank_prec_comm; 2762 break; 2763 2764 case(SEQUENTIAL_BDDC): 2765 pcbddc->coarse_communications_type = GATHERS_BDDC; 2766 coarse_mat_type = MATAIJ; 2767 coarse_pc_type = PCLU; 2768 coarse_ksp_type = KSPPREONLY; 2769 coarse_comm = PETSC_COMM_SELF; 2770 active_rank = master_proc; 2771 break; 2772 } 2773 2774 switch(pcbddc->coarse_communications_type){ 2775 2776 case(SCATTERS_BDDC): 2777 { 2778 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2779 2780 IS coarse_IS; 2781 2782 if(pcbddc->coarsening_ratio == 1) { 2783 ins_local_primal_size = pcbddc->local_primal_size; 2784 ins_local_primal_indices = pcbddc->local_primal_indices; 2785 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2786 /* nonzeros */ 2787 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2788 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2789 for (i=0;i<ins_local_primal_size;i++) { 2790 dnz[i] = ins_local_primal_size; 2791 } 2792 } else { 2793 PetscMPIInt send_size; 2794 PetscMPIInt *send_buffer; 2795 PetscInt *aux_ins_indices; 2796 PetscInt ii,jj; 2797 MPI_Request *requests; 2798 2799 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2800 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2801 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2802 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2803 pcbddc->replicated_primal_size = count_recv; 2804 j = 0; 2805 for (i=0;i<count_recv;i++) { 2806 pcbddc->local_primal_displacements[i] = j; 2807 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2808 } 2809 pcbddc->local_primal_displacements[count_recv] = j; 2810 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2811 /* allocate auxiliary space */ 2812 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2813 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2814 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2815 /* allocate stuffs for message massing */ 2816 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2817 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2818 /* send indices to be inserted */ 2819 for (i=0;i<count_recv;i++) { 2820 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2821 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2822 } 2823 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2824 send_size = pcbddc->local_primal_size; 2825 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2826 for (i=0;i<send_size;i++) { 2827 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2828 } 2829 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2830 } 2831 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2832 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2833 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2834 } 2835 j = 0; 2836 for (i=0;i<count_recv;i++) { 2837 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2838 localsizes2[i] = ii*ii; 2839 localdispl2[i] = j; 2840 j += localsizes2[i]; 2841 jj = pcbddc->local_primal_displacements[i]; 2842 /* it counts the coarse subdomains sharing the coarse node */ 2843 for (k=0;k<ii;k++) { 2844 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2845 } 2846 } 2847 /* temp_coarse_mat_vals used to store matrix values to be received */ 2848 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2849 /* evaluate how many values I will insert in coarse mat */ 2850 ins_local_primal_size = 0; 2851 for (i=0;i<coarse_size;i++) { 2852 if (aux_ins_indices[i]) { 2853 ins_local_primal_size++; 2854 } 2855 } 2856 /* evaluate indices I will insert in coarse mat */ 2857 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2858 j = 0; 2859 for(i=0;i<coarse_size;i++) { 2860 if(aux_ins_indices[i]) { 2861 ins_local_primal_indices[j] = i; 2862 j++; 2863 } 2864 } 2865 /* processes partecipating in coarse problem receive matrix data from their friends */ 2866 for (i=0;i<count_recv;i++) { 2867 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2868 } 2869 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2870 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2871 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2872 } 2873 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2874 /* nonzeros */ 2875 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2876 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2877 /* use aux_ins_indices to realize a global to local mapping */ 2878 j=0; 2879 for(i=0;i<coarse_size;i++){ 2880 if(aux_ins_indices[i]==0){ 2881 aux_ins_indices[i]=-1; 2882 } else { 2883 aux_ins_indices[i]=j; 2884 j++; 2885 } 2886 } 2887 for (i=0;i<count_recv;i++) { 2888 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2889 for (k=0;k<j;k++) { 2890 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2891 } 2892 } 2893 /* check */ 2894 for (i=0;i<ins_local_primal_size;i++) { 2895 if (dnz[i] > ins_local_primal_size) { 2896 dnz[i] = ins_local_primal_size; 2897 } 2898 } 2899 ierr = PetscFree(requests);CHKERRQ(ierr); 2900 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2901 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2902 } 2903 /* create local to global mapping needed by coarse MATIS */ 2904 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2905 coarse_comm = prec_comm; 2906 active_rank = rank_prec_comm; 2907 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2908 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2909 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2910 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2911 /* arrays for values insertion */ 2912 ins_local_primal_size = pcbddc->local_primal_size; 2913 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2914 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2915 for (j=0;j<ins_local_primal_size;j++){ 2916 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2917 for (i=0;i<ins_local_primal_size;i++) { 2918 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2919 } 2920 } 2921 } 2922 break; 2923 2924 } 2925 2926 case(GATHERS_BDDC): 2927 { 2928 2929 PetscMPIInt mysize,mysize2; 2930 PetscMPIInt *send_buffer; 2931 2932 if (rank_prec_comm==active_rank) { 2933 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2934 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 2935 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2936 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2937 /* arrays for values insertion */ 2938 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2939 localdispl2[0]=0; 2940 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2941 j=0; 2942 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2943 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2944 } 2945 2946 mysize=pcbddc->local_primal_size; 2947 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2948 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2949 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2950 2951 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2952 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2953 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2954 } else { 2955 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2956 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2957 } 2958 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2959 break; 2960 }/* switch on coarse problem and communications associated with finished */ 2961 } 2962 2963 /* Now create and fill up coarse matrix */ 2964 if ( rank_prec_comm == active_rank ) { 2965 2966 Mat matis_coarse_local_mat; 2967 2968 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2969 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2970 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2971 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2972 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2973 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2974 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2975 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2976 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2977 } else { 2978 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2979 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2980 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2981 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2982 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2983 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2984 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2985 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2986 } 2987 /* preallocation */ 2988 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2989 2990 PetscInt lrows,lcols,bs; 2991 2992 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2993 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2994 ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr); 2995 2996 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2997 2998 Vec vec_dnz,vec_onz; 2999 PetscScalar *my_dnz,*my_onz,*array; 3000 PetscInt *mat_ranges,*row_ownership; 3001 PetscInt coarse_index_row,coarse_index_col,owner; 3002 3003 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 3004 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 3005 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 3006 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 3007 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 3008 3009 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 3010 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 3011 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3012 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3013 3014 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 3015 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 3016 for (i=0;i<size_prec_comm;i++) { 3017 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 3018 row_ownership[j]=i; 3019 } 3020 } 3021 3022 for (i=0;i<pcbddc->local_primal_size;i++) { 3023 coarse_index_row = pcbddc->local_primal_indices[i]; 3024 owner = row_ownership[coarse_index_row]; 3025 for (j=i;j<pcbddc->local_primal_size;j++) { 3026 owner = row_ownership[coarse_index_row]; 3027 coarse_index_col = pcbddc->local_primal_indices[j]; 3028 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 3029 my_dnz[i] += 1.0; 3030 } else { 3031 my_onz[i] += 1.0; 3032 } 3033 if (i != j) { 3034 owner = row_ownership[coarse_index_col]; 3035 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 3036 my_dnz[j] += 1.0; 3037 } else { 3038 my_onz[j] += 1.0; 3039 } 3040 } 3041 } 3042 } 3043 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 3044 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 3045 if (pcbddc->local_primal_size) { 3046 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 3047 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 3048 } 3049 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 3050 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 3051 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 3052 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 3053 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 3054 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 3055 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 3056 3057 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 3058 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 3059 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 3060 3061 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 3062 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 3063 ierr = PetscFree(my_onz);CHKERRQ(ierr); 3064 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 3065 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 3066 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 3067 } else { 3068 for (k=0;k<size_prec_comm;k++){ 3069 offset=pcbddc->local_primal_displacements[k]; 3070 offset2=localdispl2[k]; 3071 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3072 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3073 for (j=0;j<ins_local_primal_size;j++){ 3074 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3075 } 3076 for (j=0;j<ins_local_primal_size;j++) { 3077 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 3078 } 3079 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3080 } 3081 } 3082 3083 /* check */ 3084 for (i=0;i<lrows;i++) { 3085 if (dnz[i]>lcols) dnz[i]=lcols; 3086 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 3087 } 3088 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 3089 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 3090 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3091 } else { 3092 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3093 ierr = PetscFree(dnz);CHKERRQ(ierr); 3094 } 3095 /* insert values */ 3096 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3097 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3098 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3099 if (pcbddc->coarsening_ratio == 1) { 3100 ins_coarse_mat_vals = coarse_submat_vals; 3101 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3102 } else { 3103 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3104 for (k=0;k<pcbddc->replicated_primal_size;k++) { 3105 offset = pcbddc->local_primal_displacements[k]; 3106 offset2 = localdispl2[k]; 3107 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 3108 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3109 for (j=0;j<ins_local_primal_size;j++){ 3110 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3111 } 3112 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3113 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3114 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3115 } 3116 } 3117 ins_local_primal_indices = 0; 3118 ins_coarse_mat_vals = 0; 3119 } else { 3120 for (k=0;k<size_prec_comm;k++){ 3121 offset=pcbddc->local_primal_displacements[k]; 3122 offset2=localdispl2[k]; 3123 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3124 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3125 for (j=0;j<ins_local_primal_size;j++){ 3126 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3127 } 3128 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3129 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3130 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3131 } 3132 ins_local_primal_indices = 0; 3133 ins_coarse_mat_vals = 0; 3134 } 3135 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3136 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3137 /* symmetry of coarse matrix */ 3138 if (issym) { 3139 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3140 } 3141 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3142 } 3143 3144 /* create loc to glob scatters if needed */ 3145 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3146 IS local_IS,global_IS; 3147 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3148 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3149 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3150 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3151 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3152 } 3153 3154 /* free memory no longer needed */ 3155 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3156 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3157 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3158 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3159 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3160 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3161 3162 /* Compute coarse null space */ 3163 CoarseNullSpace = 0; 3164 if (pcbddc->NullSpace) { 3165 ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr); 3166 } 3167 3168 /* KSP for coarse problem */ 3169 if (rank_prec_comm == active_rank) { 3170 PetscBool isbddc=PETSC_FALSE; 3171 3172 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3173 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3174 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3175 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3176 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3177 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3178 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3179 /* Allow user's customization */ 3180 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3181 /* Set Up PC for coarse problem BDDC */ 3182 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3183 i = pcbddc->current_level+1; 3184 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3185 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3186 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3187 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3188 if (CoarseNullSpace) { 3189 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3190 } 3191 if (dbg_flag) { 3192 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3194 } 3195 } else { 3196 if (CoarseNullSpace) { 3197 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3198 } 3199 } 3200 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3201 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3202 3203 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3204 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3205 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3206 if (j == 1) { 3207 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3208 if (isbddc) { 3209 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3210 } 3211 } 3212 } 3213 /* Check coarse problem if requested */ 3214 if ( dbg_flag && rank_prec_comm == active_rank ) { 3215 KSP check_ksp; 3216 PC check_pc; 3217 Vec check_vec; 3218 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3219 KSPType check_ksp_type; 3220 3221 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3222 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3223 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3224 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3225 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3226 if (issym) check_ksp_type = KSPCG; 3227 else check_ksp_type = KSPGMRES; 3228 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3229 } else { 3230 check_ksp_type = KSPPREONLY; 3231 } 3232 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3233 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3234 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3235 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3236 /* create random vec */ 3237 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3238 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3239 if (CoarseNullSpace) { 3240 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3241 } 3242 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3243 /* solve coarse problem */ 3244 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3245 if (CoarseNullSpace) { 3246 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3247 } 3248 /* check coarse problem residual error */ 3249 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3250 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3251 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3252 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3253 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3254 /* get eigenvalue estimation if inexact */ 3255 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3256 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3257 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3258 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3259 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3260 } 3261 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3262 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3263 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3264 } 3265 if (dbg_flag) { 3266 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3267 } 3268 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3269 3270 PetscFunctionReturn(0); 3271 } 3272 3273