1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscErrorCode ierr; 11 12 PetscFunctionBegin; 13 /* Compute matrix after change of basis and extract local submatrices */ 14 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 15 16 /* Allocate needed vectors */ 17 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 18 19 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 20 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 21 22 /* Setup local solvers ksp_D and ksp_R */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* setup local correction and local part of coarse basis */ 31 ierr = PCBDDCSetUpCoarseLocal(pc);CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "PCBDDCSetLevel" 37 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 38 { 39 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 40 41 PetscFunctionBegin; 42 pcbddc->current_level=level; 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "PCBDDCResetCustomization" 48 PetscErrorCode PCBDDCResetCustomization(PC pc) 49 { 50 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 51 PetscInt i; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 60 for (i=0;i<pcbddc->n_ISForDofs;i++) { 61 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 62 } 63 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCCreateWorkVectors" 118 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 128 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 129 local_primal_size = n_constraints+n_vertices; 130 n_R = pcis->n-n_vertices; 131 /* local work vectors */ 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 137 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 138 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 139 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 140 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 141 if (n_constraints) { 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 145 } 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 151 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc) 152 { 153 PetscErrorCode ierr; 154 /* pointers to pcis and pcbddc */ 155 PC_IS* pcis = (PC_IS*)pc->data; 156 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 157 /* submatrices of local problem */ 158 Mat A_RV,A_VR,A_VV; 159 /* working matrices */ 160 Mat M1,M2,M3,C_CR; 161 /* working vectors */ 162 Vec vec1_C,vec2_C,vec1_V,vec2_V; 163 /* additional working stuff */ 164 IS is_aux; 165 ISLocalToGlobalMapping BtoNmap; 166 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 167 const PetscScalar *array,*row_cmat_values; 168 const PetscInt *row_cmat_indices,*idx_R_local; 169 PetscInt *vertices,*idx_V_B,*auxindices; 170 PetscInt n_vertices,n_constraints,size_of_constraint; 171 PetscInt i,j,n_R,n_D,n_B; 172 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 173 /* Vector and matrix types */ 174 VecType impVecType; 175 MatType impMatType; 176 /* some shortcuts to scalars */ 177 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 178 /* for debugging purposes */ 179 PetscReal *coarsefunctions_errors,*constraints_errors; 180 181 PetscFunctionBegin; 182 /* get number of vertices and their local indices */ 183 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 184 n_constraints = pcbddc->local_primal_size-n_vertices; 185 /* Set Non-overlapping dimensions */ 186 n_B = pcis->n_B; n_D = pcis->n - n_B; 187 n_R = pcis->n-n_vertices; 188 189 /* Set types for local objects needed by BDDC precondtioner */ 190 impMatType = MATSEQDENSE; 191 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 192 193 /* Allocating some extra storage just to be safe */ 194 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 195 for (i=0;i<pcis->n;i++) auxindices[i]=i; 196 197 /* vertices in boundary numbering */ 198 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 199 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 200 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 201 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 202 if (i != n_vertices) { 203 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 204 } 205 206 /* some work vectors on vertices and/or constraints */ 207 if (n_vertices) { 208 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 209 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 210 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 211 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 212 } 213 if (n_constraints) { 214 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 215 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 216 } 217 218 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 219 if (n_constraints) { 220 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 221 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 222 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 223 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 224 225 /* Extract constraints on R nodes: C_{CR} */ 226 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 227 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 228 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 229 230 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 231 for (i=0;i<n_constraints;i++) { 232 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 233 /* Get row of constraint matrix in R numbering */ 234 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 235 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 237 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 238 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 239 /* Solve for row of constraint matrix in R numbering */ 240 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 241 /* Set values in local_auxmat2 */ 242 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 243 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 245 } 246 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 249 250 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 251 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 252 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 253 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 254 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 255 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 256 ierr = MatSetUp(M1);CHKERRQ(ierr); 257 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 258 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 259 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 260 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 261 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 262 ierr = MatDestroy(&M2);CHKERRQ(ierr); 263 ierr = MatDestroy(&M3);CHKERRQ(ierr); 264 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 265 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 266 } 267 268 /* Get submatrices from subdomain matrix */ 269 if (n_vertices) { 270 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 271 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 273 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 274 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 275 } 276 277 /* Matrix of coarse basis functions (local) */ 278 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 279 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 280 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 281 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 282 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 283 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 284 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 285 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 286 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 287 } 288 289 if (pcbddc->dbg_flag) { 290 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 291 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 292 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 293 } 294 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 295 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 296 297 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 298 299 /* vertices */ 300 for (i=0;i<n_vertices;i++) { 301 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 302 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 303 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 304 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 305 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 306 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 307 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 308 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 309 if (n_constraints) { 310 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 311 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 313 } 314 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 315 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 316 317 /* Set values in coarse basis function and subdomain part of coarse_mat */ 318 /* coarse basis functions */ 319 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 320 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 322 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 323 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 324 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 325 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 326 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 327 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 328 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 329 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 330 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 331 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 332 } 333 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 334 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 335 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 337 if (n_constraints) { 338 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 339 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 341 } 342 343 /* check */ 344 if (pcbddc->dbg_flag) { 345 /* assemble subdomain vector on local nodes */ 346 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 348 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 350 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 351 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 352 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 353 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 354 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 355 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 356 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 357 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 358 if (n_constraints) { 359 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 360 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 362 } 363 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 364 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 365 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 366 /* check saddle point solution */ 367 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 368 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 369 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 370 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 371 /* shift by the identity matrix */ 372 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 373 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 376 } 377 } 378 379 /* constraints */ 380 for (i=0;i<n_constraints;i++) { 381 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 382 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 383 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 384 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 385 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 386 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 387 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 if (n_vertices) { 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 } 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 j = i+n_vertices; /* don't touch this! */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 if (n_vertices) { 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 } 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 if (n_vertices) { 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 } 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 449 } 450 } 451 /* call assembling routines for local coarse basis */ 452 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 455 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 } 458 459 /* compute other basis functions for non-symmetric problems */ 460 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 461 if (!setsym || (setsym && !issym)) { 462 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 463 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 464 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 465 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 466 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 467 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 468 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 470 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 471 } 472 for (i=0;i<pcbddc->local_primal_size;i++) { 473 if (n_constraints) { 474 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 475 for (j=0;j<n_constraints;j++) { 476 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 477 } 478 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 479 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 480 } 481 if (i<n_vertices) { 482 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 483 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 485 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 486 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 487 if (n_constraints) { 488 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 489 } 490 } else { 491 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 492 } 493 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 498 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 500 if (i<n_vertices) { 501 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 502 } 503 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 504 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 507 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 509 } 510 511 if (pcbddc->dbg_flag) { 512 /* assemble subdomain vector on nodes */ 513 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 514 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 515 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 516 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 517 if (i<n_vertices) { 518 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 519 } 520 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 522 /* assemble subdomain vector of lagrange multipliers */ 523 for (j=0;j<pcbddc->local_primal_size;j++) { 524 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 527 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 528 /* check saddle point solution */ 529 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 530 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 531 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 532 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 533 /* shift by the identity matrix */ 534 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 535 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 536 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 537 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 538 } 539 } 540 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 541 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 542 if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 543 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } 546 } 547 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 548 /* Checking coarse_sub_mat and coarse basis functios */ 549 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 550 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 551 if (pcbddc->dbg_flag) { 552 Mat coarse_sub_mat; 553 Mat AUXMAT,TM1,TM2,TM3,TM4; 554 Mat coarse_phi_D,coarse_phi_B; 555 Mat coarse_psi_D,coarse_psi_B; 556 Mat A_II,A_BB,A_IB,A_BI; 557 MatType checkmattype=MATSEQAIJ; 558 PetscReal real_value; 559 560 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 561 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 562 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 563 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 564 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 565 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 566 if (pcbddc->coarse_psi_B) { 567 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 568 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 569 } 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 571 572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 573 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 574 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 575 if (pcbddc->coarse_psi_B) { 576 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 577 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 579 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 580 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 582 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 583 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 585 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 586 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 588 } else { 589 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 590 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 591 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 592 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 594 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 } 598 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 599 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 600 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 601 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 602 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 603 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 608 for (i=0;i<pcbddc->local_primal_size;i++) { 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 610 } 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 612 for (i=0;i<pcbddc->local_primal_size;i++) { 613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 614 } 615 if (pcbddc->coarse_psi_B) { 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 617 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 619 } 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 621 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 623 } 624 } 625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 626 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 627 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 628 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 629 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 630 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 631 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 632 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 633 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 634 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 635 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 636 if (pcbddc->coarse_psi_B) { 637 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 638 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 639 } 640 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 641 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 642 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 643 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 644 } 645 /* free memory */ 646 if (n_vertices) { 647 ierr = PetscFree(vertices);CHKERRQ(ierr); 648 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 649 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 652 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 653 } 654 if (n_constraints) { 655 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 656 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 657 ierr = MatDestroy(&M1);CHKERRQ(ierr); 658 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 659 } 660 ierr = PetscFree(auxindices);CHKERRQ(ierr); 661 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 662 ierr = PCBDDCSetUpCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 663 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 669 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 670 { 671 PC_IS* pcis = (PC_IS*)(pc->data); 672 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 673 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 674 /* manage repeated solves */ 675 MatReuse reuse; 676 MatStructure matstruct; 677 PetscErrorCode ierr; 678 679 PetscFunctionBegin; 680 /* get mat flags */ 681 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 682 reuse = MAT_INITIAL_MATRIX; 683 if (pc->setupcalled) { 684 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 685 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 686 if (matstruct == SAME_NONZERO_PATTERN) { 687 reuse = MAT_REUSE_MATRIX; 688 } else { 689 reuse = MAT_INITIAL_MATRIX; 690 } 691 } 692 if (reuse == MAT_INITIAL_MATRIX) { 693 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 695 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 698 } 699 700 /* transform local matrices if needed */ 701 if (pcbddc->use_change_of_basis) { 702 Mat change_mat_all; 703 PetscScalar *row_cmat_values; 704 PetscInt *row_cmat_indices; 705 PetscInt *nnz,*is_indices,*temp_indices; 706 PetscInt i,j,k,n_D,n_B; 707 708 /* Get Non-overlapping dimensions */ 709 n_B = pcis->n_B; 710 n_D = pcis->n-n_B; 711 712 /* compute nonzero structure of change of basis on all local nodes */ 713 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 714 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 715 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 716 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 718 k=1; 719 for (i=0;i<n_B;i++) { 720 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 721 nnz[is_indices[i]]=j; 722 if (k < j) k = j; 723 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 724 } 725 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 /* assemble change of basis matrix on the whole set of local dofs */ 727 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 728 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 729 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 730 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 731 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 732 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 733 for (i=0;i<n_D;i++) { 734 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 735 } 736 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 738 for (i=0;i<n_B;i++) { 739 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 740 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 741 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 742 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 743 } 744 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 747 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 748 if (i==1) { 749 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 750 } else { 751 Mat work_mat; 752 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 753 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 754 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 755 } 756 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 757 ierr = PetscFree(nnz);CHKERRQ(ierr); 758 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 759 } else { 760 /* without change of basis, the local matrix is unchanged */ 761 if (!pcbddc->local_mat) { 762 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 763 pcbddc->local_mat = matis->A; 764 } 765 } 766 767 /* get submatrices */ 768 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 769 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 770 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 771 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 777 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 778 { 779 PC_IS* pcis = (PC_IS*)(pc->data); 780 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 781 IS is_aux1,is_aux2; 782 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 783 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 784 PetscBool *array_bool; 785 PetscErrorCode ierr; 786 787 PetscFunctionBegin; 788 /* Set Non-overlapping dimensions */ 789 n_B = pcis->n_B; n_D = pcis->n - n_B; 790 /* get vertex indices from constraint matrix */ 791 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 792 /* Set number of constraints */ 793 n_constraints = pcbddc->local_primal_size-n_vertices; 794 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 795 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&array_bool);CHKERRQ(ierr); 796 for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_TRUE; 797 for (i=0;i<n_vertices;i++) array_bool[vertices[i]] = PETSC_FALSE; 798 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 799 for (i=0, n_R=0; i<pcis->n; i++) { 800 if (array_bool[i]) { 801 idx_R_local[n_R] = i; 802 n_R++; 803 } 804 } 805 ierr = PetscFree(vertices);CHKERRQ(ierr); 806 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 807 808 /* print some info if requested */ 809 if (pcbddc->dbg_flag) { 810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 811 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 817 } 818 819 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 820 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 821 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0; i<n_D; i++) array_bool[is_indices[i]] = PETSC_FALSE; 824 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0, j=0; i<n_R; i++) { 826 if (array_bool[idx_R_local[i]]) { 827 aux_array1[j] = i; 828 j++; 829 } 830 } 831 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 832 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 for (i=0, j=0; i<n_B; i++) { 834 if (array_bool[is_indices[i]]) { 835 aux_array2[j] = i; j++; 836 } 837 } 838 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 839 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 840 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 841 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 842 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 843 844 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 845 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 846 for (i=0, j=0; i<n_R; i++) { 847 if (!array_bool[idx_R_local[i]]) { 848 aux_array1[j] = i; 849 j++; 850 } 851 } 852 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 853 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 854 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 855 } 856 ierr = PetscFree(array_bool);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 862 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 863 { 864 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 865 866 PetscFunctionBegin; 867 pcbddc->use_exact_dirichlet=use; 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 873 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 874 { 875 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 876 PC_IS *pcis = (PC_IS*)pc->data; 877 PC pc_temp; 878 Mat A_RR; 879 Vec vec1,vec2,vec3; 880 MatStructure matstruct; 881 PetscScalar m_one = -1.0; 882 PetscReal value; 883 PetscInt n_D,n_R,use_exact,use_exact_reduced; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* Creating PC contexts for local Dirichlet and Neumann problems */ 888 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 889 890 /* DIRICHLET PROBLEM */ 891 /* Matrix for Dirichlet problem is pcis->A_II */ 892 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 893 if (!pcbddc->ksp_D) { /* create object if not yet build */ 894 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 895 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 896 /* default */ 897 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 898 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 899 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 900 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 901 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 902 } 903 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 904 /* Allow user's customization */ 905 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 906 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 907 if (!n_D) { 908 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 909 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 910 } 911 /* Set Up KSP for Dirichlet problem of BDDC */ 912 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 913 /* set ksp_D into pcis data */ 914 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 915 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 916 pcis->ksp_D = pcbddc->ksp_D; 917 918 /* NEUMANN PROBLEM */ 919 /* Matrix for Neumann problem is A_RR -> we need to create it */ 920 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 921 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 922 if (!pcbddc->ksp_R) { /* create object if not yet build */ 923 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 924 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 925 /* default */ 926 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 927 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 928 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 929 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 930 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 931 } 932 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 933 /* Allow user's customization */ 934 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 935 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 936 if (!n_R) { 937 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 938 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 939 } 940 /* Set Up KSP for Neumann problem of BDDC */ 941 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 942 943 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 944 945 /* Dirichlet */ 946 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 947 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 948 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 949 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 950 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 951 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 952 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 953 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 954 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 955 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 956 /* need to be adapted? */ 957 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 958 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 959 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 960 /* print info */ 961 if (pcbddc->dbg_flag) { 962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 963 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 967 } 968 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 969 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 970 } 971 972 /* Neumann */ 973 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 974 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 975 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 976 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 977 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 978 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 979 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 980 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 981 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 982 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 983 /* need to be adapted? */ 984 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 985 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 986 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 987 /* print info */ 988 if (pcbddc->dbg_flag) { 989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 991 } 992 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 993 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 994 } 995 996 /* free Neumann problem's matrix */ 997 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1003 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1004 { 1005 PetscErrorCode ierr; 1006 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1007 1008 PetscFunctionBegin; 1009 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1010 if (pcbddc->local_auxmat1) { 1011 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1012 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1019 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1020 { 1021 PetscErrorCode ierr; 1022 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1023 PC_IS* pcis = (PC_IS*) (pc->data); 1024 const PetscScalar zero = 0.0; 1025 1026 PetscFunctionBegin; 1027 /* Application of PHI^T (or PSI^T) */ 1028 if (pcbddc->coarse_psi_B) { 1029 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1030 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1031 } else { 1032 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1033 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1034 } 1035 /* Scatter data of coarse_rhs */ 1036 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1037 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 1039 /* Local solution on R nodes */ 1040 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1041 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1042 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1043 if (pcbddc->inexact_prec_type) { 1044 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1046 } 1047 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1048 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1049 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 if (pcbddc->inexact_prec_type) { 1052 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 } 1055 1056 /* Coarse solution */ 1057 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1059 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1060 } 1061 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1062 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1063 1064 /* Sum contributions from two levels */ 1065 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1066 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1072 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1073 { 1074 PetscErrorCode ierr; 1075 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1076 1077 PetscFunctionBegin; 1078 switch (pcbddc->coarse_communications_type) { 1079 case SCATTERS_BDDC: 1080 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1081 break; 1082 case GATHERS_BDDC: 1083 break; 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1090 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1091 { 1092 PetscErrorCode ierr; 1093 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1094 PetscScalar* array_to; 1095 PetscScalar* array_from; 1096 MPI_Comm comm; 1097 PetscInt i; 1098 1099 PetscFunctionBegin; 1100 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1101 switch (pcbddc->coarse_communications_type) { 1102 case SCATTERS_BDDC: 1103 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1104 break; 1105 case GATHERS_BDDC: 1106 if (vec_from) { 1107 ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr); 1108 } 1109 if (vec_to) { 1110 ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr); 1111 } 1112 switch(pcbddc->coarse_problem_type){ 1113 case SEQUENTIAL_BDDC: 1114 if (smode == SCATTER_FORWARD) { 1115 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1116 if (vec_to) { 1117 if (imode == ADD_VALUES) { 1118 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1119 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1120 } 1121 } else { 1122 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1123 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1124 } 1125 } 1126 } 1127 } else { 1128 if (vec_from) { 1129 if (imode == ADD_VALUES) { 1130 MPI_Comm vec_from_comm; 1131 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr); 1132 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type); 1133 } 1134 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1135 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1136 } 1137 } 1138 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1139 } 1140 break; 1141 case REPLICATED_BDDC: 1142 if (smode == SCATTER_FORWARD) { 1143 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1144 if (imode == ADD_VALUES) { 1145 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1146 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1147 } 1148 } else { 1149 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1150 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1151 } 1152 } 1153 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1154 if (imode == ADD_VALUES) { 1155 for (i=0;i<pcbddc->local_primal_size;i++) { 1156 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 1157 } 1158 } else { 1159 for (i=0;i<pcbddc->local_primal_size;i++) { 1160 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1161 } 1162 } 1163 } 1164 break; 1165 case MULTILEVEL_BDDC: 1166 break; 1167 case PARALLEL_BDDC: 1168 break; 1169 } 1170 if (vec_from) { 1171 ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr); 1172 } 1173 if (vec_to) { 1174 ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr); 1175 } 1176 break; 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 /* uncomment for testing purposes */ 1182 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1185 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1186 { 1187 PetscErrorCode ierr; 1188 PC_IS* pcis = (PC_IS*)(pc->data); 1189 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1190 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1191 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1192 MatType impMatType=MATSEQAIJ; 1193 /* one and zero */ 1194 PetscScalar one=1.0,zero=0.0; 1195 /* space to store constraints and their local indices */ 1196 PetscScalar *temp_quadrature_constraint; 1197 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1198 /* iterators */ 1199 PetscInt i,j,k,total_counts,temp_start_ptr; 1200 /* stuff to store connected components stored in pcbddc->mat_graph */ 1201 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1202 PetscInt n_ISForFaces,n_ISForEdges; 1203 PetscBool get_faces,get_edges,get_vertices; 1204 /* near null space stuff */ 1205 MatNullSpace nearnullsp; 1206 const Vec *nearnullvecs; 1207 Vec *localnearnullsp; 1208 PetscBool nnsp_has_cnst; 1209 PetscInt nnsp_size; 1210 PetscScalar *array; 1211 /* BLAS integers */ 1212 PetscBLASInt lwork,lierr; 1213 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1214 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1215 /* LAPACK working arrays for SVD or POD */ 1216 PetscBool skip_lapack; 1217 PetscScalar *work; 1218 PetscReal *singular_vals; 1219 #if defined(PETSC_USE_COMPLEX) 1220 PetscReal *rwork; 1221 #endif 1222 #if defined(PETSC_MISSING_LAPACK_GESVD) 1223 PetscBLASInt Blas_one_2=1; 1224 PetscScalar *temp_basis,*correlation_mat; 1225 #else 1226 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1227 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1228 #endif 1229 /* change of basis */ 1230 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1231 PetscBool boolforchange,*change_basis,*touched; 1232 /* auxiliary stuff */ 1233 PetscInt *nnz,*is_indices,*local_to_B; 1234 /* some quantities */ 1235 PetscInt n_vertices,total_primal_vertices; 1236 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1237 1238 1239 PetscFunctionBegin; 1240 /* Get index sets for faces, edges and vertices from graph */ 1241 get_faces = PETSC_TRUE; 1242 get_edges = PETSC_TRUE; 1243 get_vertices = PETSC_TRUE; 1244 if (pcbddc->vertices_flag) { 1245 get_faces = PETSC_FALSE; 1246 get_edges = PETSC_FALSE; 1247 } 1248 if (pcbddc->constraints_flag) { 1249 get_vertices = PETSC_FALSE; 1250 } 1251 if (pcbddc->faces_flag) { 1252 get_edges = PETSC_FALSE; 1253 } 1254 if (pcbddc->edges_flag) { 1255 get_faces = PETSC_FALSE; 1256 } 1257 /* default */ 1258 if (!get_faces && !get_edges && !get_vertices) { 1259 get_vertices = PETSC_TRUE; 1260 } 1261 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1262 /* print some info */ 1263 if (pcbddc->dbg_flag) { 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1265 i = 0; 1266 if (ISForVertices) { 1267 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1268 } 1269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1273 } 1274 /* check if near null space is attached to global mat */ 1275 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1276 if (nearnullsp) { 1277 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1278 } else { /* if near null space is not provided BDDC uses constants by default */ 1279 nnsp_size = 0; 1280 nnsp_has_cnst = PETSC_TRUE; 1281 } 1282 /* get max number of constraints on a single cc */ 1283 max_constraints = nnsp_size; 1284 if (nnsp_has_cnst) max_constraints++; 1285 1286 /* 1287 Evaluate maximum storage size needed by the procedure 1288 - temp_indices will contain start index of each constraint stored as follows 1289 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1290 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1291 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1292 */ 1293 total_counts = n_ISForFaces+n_ISForEdges; 1294 total_counts *= max_constraints; 1295 n_vertices = 0; 1296 if (ISForVertices) { 1297 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1298 } 1299 total_counts += n_vertices; 1300 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1301 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 1302 total_counts = 0; 1303 max_size_of_constraint = 0; 1304 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1305 if (i<n_ISForEdges) { 1306 used_IS = &ISForEdges[i]; 1307 } else { 1308 used_IS = &ISForFaces[i-n_ISForEdges]; 1309 } 1310 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1311 total_counts += j; 1312 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1313 } 1314 total_counts *= max_constraints; 1315 total_counts += n_vertices; 1316 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1317 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1318 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1319 /* local to boundary numbering */ 1320 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1321 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1322 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1323 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1324 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 /* get local part of global near null space vectors */ 1326 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1327 for (k=0;k<nnsp_size;k++) { 1328 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1329 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331 } 1332 1333 /* whether or not to skip lapack calls */ 1334 skip_lapack = PETSC_TRUE; 1335 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1336 1337 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1338 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1339 PetscScalar temp_work; 1340 #if defined(PETSC_MISSING_LAPACK_GESVD) 1341 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1342 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1343 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1344 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1345 #if defined(PETSC_USE_COMPLEX) 1346 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1347 #endif 1348 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1349 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1350 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1351 lwork = -1; 1352 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1353 #if !defined(PETSC_USE_COMPLEX) 1354 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1355 #else 1356 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1357 #endif 1358 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1359 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1360 #else /* on missing GESVD */ 1361 /* SVD */ 1362 PetscInt max_n,min_n; 1363 max_n = max_size_of_constraint; 1364 min_n = max_constraints; 1365 if (max_size_of_constraint < max_constraints) { 1366 min_n = max_size_of_constraint; 1367 max_n = max_constraints; 1368 } 1369 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1370 #if defined(PETSC_USE_COMPLEX) 1371 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1372 #endif 1373 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1374 lwork = -1; 1375 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1376 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1377 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1378 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1379 #if !defined(PETSC_USE_COMPLEX) 1380 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1381 #else 1382 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1383 #endif 1384 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1385 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1386 #endif /* on missing GESVD */ 1387 /* Allocate optimal workspace */ 1388 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1389 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1390 } 1391 /* Now we can loop on constraining sets */ 1392 total_counts = 0; 1393 temp_indices[0] = 0; 1394 /* vertices */ 1395 if (ISForVertices) { 1396 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1397 if (nnsp_has_cnst) { /* consider all vertices */ 1398 for (i=0;i<n_vertices;i++) { 1399 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1400 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1401 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1402 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1403 change_basis[total_counts]=PETSC_FALSE; 1404 total_counts++; 1405 } 1406 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1407 PetscBool used_vertex; 1408 for (i=0;i<n_vertices;i++) { 1409 used_vertex = PETSC_FALSE; 1410 k = 0; 1411 while (!used_vertex && k<nnsp_size) { 1412 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1413 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1414 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1415 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1416 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1417 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1418 change_basis[total_counts]=PETSC_FALSE; 1419 total_counts++; 1420 used_vertex = PETSC_TRUE; 1421 } 1422 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1423 k++; 1424 } 1425 } 1426 } 1427 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1428 n_vertices = total_counts; 1429 } 1430 1431 /* edges and faces */ 1432 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1433 if (i<n_ISForEdges) { 1434 used_IS = &ISForEdges[i]; 1435 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1436 } else { 1437 used_IS = &ISForFaces[i-n_ISForEdges]; 1438 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1439 } 1440 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1441 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1442 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1443 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 /* change of basis should not be performed on local periodic nodes */ 1445 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1446 if (nnsp_has_cnst) { 1447 PetscScalar quad_value; 1448 temp_constraints++; 1449 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1450 for (j=0;j<size_of_constraint;j++) { 1451 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1452 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1453 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1454 } 1455 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1456 change_basis[total_counts]=boolforchange; 1457 total_counts++; 1458 } 1459 for (k=0;k<nnsp_size;k++) { 1460 PetscReal real_value; 1461 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1462 for (j=0;j<size_of_constraint;j++) { 1463 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1464 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1465 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1466 } 1467 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1468 /* check if array is null on the connected component */ 1469 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1470 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1471 if (real_value > 0.0) { /* keep indices and values */ 1472 temp_constraints++; 1473 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1474 change_basis[total_counts]=boolforchange; 1475 total_counts++; 1476 } 1477 } 1478 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1479 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1480 if (!pcbddc->use_nnsp_true) { 1481 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1482 1483 #if defined(PETSC_MISSING_LAPACK_GESVD) 1484 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1485 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1486 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1487 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1488 from that computed using LAPACKgesvd 1489 -> This is due to a different computation of eigenvectors in LAPACKheev 1490 -> The quality of the POD-computed basis will be the same */ 1491 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1492 /* Store upper triangular part of correlation matrix */ 1493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1494 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1495 for (j=0;j<temp_constraints;j++) { 1496 for (k=0;k<j+1;k++) { 1497 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1498 } 1499 } 1500 /* compute eigenvalues and eigenvectors of correlation matrix */ 1501 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1502 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1503 #if !defined(PETSC_USE_COMPLEX) 1504 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1505 #else 1506 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1507 #endif 1508 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1510 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1511 j=0; 1512 while (j < temp_constraints && singular_vals[j] < tol) j++; 1513 total_counts=total_counts-j; 1514 /* scale and copy POD basis into used quadrature memory */ 1515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1516 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1517 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1519 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1521 if (j<temp_constraints) { 1522 PetscInt ii; 1523 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1524 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1525 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1527 for (k=0;k<temp_constraints-j;k++) { 1528 for (ii=0;ii<size_of_constraint;ii++) { 1529 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1530 } 1531 } 1532 } 1533 #else /* on missing GESVD */ 1534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1535 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1538 #if !defined(PETSC_USE_COMPLEX) 1539 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1540 #else 1541 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1542 #endif 1543 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1544 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1545 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1546 k = temp_constraints; 1547 if (k > size_of_constraint) k = size_of_constraint; 1548 j = 0; 1549 while (j < k && singular_vals[k-j-1] < tol) j++; 1550 total_counts = total_counts-temp_constraints+k-j; 1551 #endif /* on missing GESVD */ 1552 } 1553 } 1554 /* free index sets of faces, edges and vertices */ 1555 for (i=0;i<n_ISForFaces;i++) { 1556 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1557 } 1558 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1559 for (i=0;i<n_ISForEdges;i++) { 1560 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1561 } 1562 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1563 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1564 1565 /* free workspace */ 1566 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1567 ierr = PetscFree(work);CHKERRQ(ierr); 1568 #if defined(PETSC_USE_COMPLEX) 1569 ierr = PetscFree(rwork);CHKERRQ(ierr); 1570 #endif 1571 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1572 #if defined(PETSC_MISSING_LAPACK_GESVD) 1573 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1574 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1575 #endif 1576 } 1577 for (k=0;k<nnsp_size;k++) { 1578 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1579 } 1580 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1581 1582 /* set quantities in pcbddc data structure */ 1583 /* n_vertices defines the number of subdomain corners in the primal space */ 1584 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1585 pcbddc->local_primal_size = total_counts; 1586 pcbddc->n_vertices = n_vertices; 1587 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1588 1589 /* Create constraint matrix */ 1590 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1591 /* so we need to set it up properly either with or without change of basis */ 1592 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1593 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1594 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1595 /* array to compute a local numbering of constraints : vertices first then constraints */ 1596 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1597 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1598 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1599 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1600 /* auxiliary stuff for basis change */ 1601 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1602 ierr = PetscMalloc(pcis->n_B*sizeof(PetscBool),&touched);CHKERRQ(ierr); 1603 ierr = PetscMemzero(touched,pcis->n_B*sizeof(PetscBool));CHKERRQ(ierr); 1604 1605 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1606 total_primal_vertices=0; 1607 for (i=0;i<pcbddc->local_primal_size;i++) { 1608 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1609 if (size_of_constraint == 1) { 1610 touched[temp_indices_to_constraint_B[temp_indices[i]]]=PETSC_TRUE; 1611 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1612 aux_primal_minloc[total_primal_vertices]=0; 1613 total_primal_vertices++; 1614 } else if (change_basis[i]) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1615 PetscInt min_loc,min_index; 1616 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1617 /* find first untouched local node */ 1618 k = 0; 1619 while (touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) k++; 1620 min_index = global_indices[k]; 1621 min_loc = k; 1622 /* search the minimum among global nodes already untouched on the cc */ 1623 for (k=1;k<size_of_constraint;k++) { 1624 /* there can be more than one constraint on a single connected component */ 1625 if (min_index > global_indices[k] && !touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) { 1626 min_index = global_indices[k]; 1627 min_loc = k; 1628 } 1629 } 1630 touched[temp_indices_to_constraint_B[temp_indices[i]+min_loc]] = PETSC_TRUE; 1631 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1632 aux_primal_minloc[total_primal_vertices]=min_loc; 1633 total_primal_vertices++; 1634 } 1635 } 1636 /* free workspace */ 1637 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1638 ierr = PetscFree(touched);CHKERRQ(ierr); 1639 /* permute indices in order to have a sorted set of vertices */ 1640 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1641 1642 /* nonzero structure of constraint matrix */ 1643 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1644 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1645 j=total_primal_vertices; 1646 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1647 if (!change_basis[i]) { 1648 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1649 j++; 1650 } 1651 } 1652 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1653 ierr = PetscFree(nnz);CHKERRQ(ierr); 1654 /* set values in constraint matrix */ 1655 for (i=0;i<total_primal_vertices;i++) { 1656 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 total_counts = total_primal_vertices; 1659 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1660 if (!change_basis[i]) { 1661 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1662 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1663 total_counts++; 1664 } 1665 } 1666 /* assembling */ 1667 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 /* 1670 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1671 */ 1672 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1673 if (pcbddc->use_change_of_basis) { 1674 PetscBool qr_needed = PETSC_FALSE; 1675 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1676 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1677 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1678 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1679 /* work arrays */ 1680 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1681 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1682 /* nonzeros per row */ 1683 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1684 if (change_basis[i]) { 1685 qr_needed = PETSC_TRUE; 1686 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1687 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1688 } 1689 } 1690 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1691 ierr = PetscFree(nnz);CHKERRQ(ierr); 1692 /* Set initial identity in the matrix */ 1693 for (i=0;i<pcis->n_B;i++) { 1694 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1695 } 1696 1697 /* Now we loop on the constraints which need a change of basis */ 1698 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1699 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1700 /* Change of basis matrix T computed via QR decomposition of constraints */ 1701 if (qr_needed) { 1702 /* dual and primal dofs on a single cc */ 1703 PetscInt dual_dofs,primal_dofs; 1704 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1705 PetscInt primal_counter; 1706 /* working stuff for GEQRF */ 1707 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1708 PetscBLASInt lqr_work; 1709 /* working stuff for UNGQR */ 1710 PetscScalar *gqr_work,lgqr_work_t; 1711 PetscBLASInt lgqr_work; 1712 /* working stuff for TRTRS */ 1713 PetscScalar *trs_rhs; 1714 PetscBLASInt Blas_NRHS; 1715 /* pointers for values insertion into change of basis matrix */ 1716 PetscInt *start_rows,*start_cols; 1717 PetscScalar *start_vals; 1718 /* working stuff for values insertion */ 1719 PetscBool *is_primal; 1720 1721 /* space to store Q */ 1722 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1723 /* first we issue queries for optimal work */ 1724 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1725 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1726 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1727 lqr_work = -1; 1728 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1729 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1730 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1731 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1732 lgqr_work = -1; 1733 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1734 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1735 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1736 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1737 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1738 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1740 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1741 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1742 /* array to store scaling factors for reflectors */ 1743 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1744 /* array to store rhs and solution of triangular solver */ 1745 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1746 /* array to store whether a node is primal or not */ 1747 ierr = PetscMalloc(pcis->n_B*sizeof(*is_primal),&is_primal);CHKERRQ(ierr); 1748 ierr = PetscMemzero(is_primal,pcis->n_B*sizeof(*is_primal));CHKERRQ(ierr); 1749 for (i=0;i<total_primal_vertices;i++) is_primal[local_to_B[aux_primal_numbering[i]]] = PETSC_TRUE; 1750 1751 /* allocating workspace for check */ 1752 if (pcbddc->dbg_flag) { 1753 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1754 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1755 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1756 } 1757 1758 /* loop on constraints and see whether or not they need a change of basis */ 1759 /* -> using implicit ordering contained in temp_indices data */ 1760 total_counts = pcbddc->n_vertices; 1761 primal_counter = total_counts; 1762 while (total_counts<pcbddc->local_primal_size) { 1763 primal_dofs = 1; 1764 if (change_basis[total_counts]) { 1765 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1766 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1767 primal_dofs++; 1768 } 1769 /* get constraint info */ 1770 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1771 dual_dofs = size_of_constraint-primal_dofs; 1772 1773 /* copy quadrature constraints for change of basis check */ 1774 if (pcbddc->dbg_flag) { 1775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1776 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1777 } 1778 1779 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1780 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1781 1782 /* compute QR decomposition of constraints */ 1783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1784 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1786 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1787 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1788 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1790 1791 /* explictly compute R^-T */ 1792 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1793 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1794 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1795 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1796 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1797 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1798 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1799 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1800 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1801 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1802 1803 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1805 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1806 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1807 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1808 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1809 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1812 1813 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1814 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1815 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1817 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1818 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1819 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1820 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1822 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1823 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1824 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1825 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1826 1827 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1828 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1829 /* insert cols for primal dofs */ 1830 for (j=0;j<primal_dofs;j++) { 1831 start_vals = &qr_basis[j*size_of_constraint]; 1832 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1833 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1834 } 1835 /* insert cols for dual dofs */ 1836 for (j=0,k=0;j<dual_dofs;k++) { 1837 if (!is_primal[temp_indices_to_constraint_B[temp_indices[total_counts]+k]]) { 1838 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1839 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1840 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1841 j++; 1842 } 1843 } 1844 1845 /* check change of basis */ 1846 if (pcbddc->dbg_flag) { 1847 PetscInt ii,jj; 1848 PetscBool valid_qr=PETSC_TRUE; 1849 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1856 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1857 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1858 for (jj=0;jj<size_of_constraint;jj++) { 1859 for (ii=0;ii<primal_dofs;ii++) { 1860 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1861 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1862 } 1863 } 1864 if (!valid_qr) { 1865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1866 for (jj=0;jj<size_of_constraint;jj++) { 1867 for (ii=0;ii<primal_dofs;ii++) { 1868 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1869 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1870 } 1871 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1872 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1873 } 1874 } 1875 } 1876 } else { 1877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1878 } 1879 } 1880 /* increment primal counter */ 1881 primal_counter += primal_dofs; 1882 } else { 1883 if (pcbddc->dbg_flag) { 1884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1885 } 1886 } 1887 /* increment constraint counter total_counts */ 1888 total_counts += primal_dofs; 1889 } 1890 if (pcbddc->dbg_flag) { 1891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1892 ierr = PetscFree(work);CHKERRQ(ierr); 1893 } 1894 /* free workspace */ 1895 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1896 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1897 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1898 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1899 ierr = PetscFree(is_primal);CHKERRQ(ierr); 1900 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1901 } 1902 /* assembling */ 1903 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 /* 1906 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1907 */ 1908 } 1909 /* free workspace */ 1910 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1911 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1912 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1913 ierr = PetscFree(change_basis);CHKERRQ(ierr); 1914 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1915 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1916 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1917 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1923 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1924 { 1925 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1926 PC_IS *pcis = (PC_IS*)pc->data; 1927 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1928 PetscInt bs,ierr,i,vertex_size; 1929 PetscViewer viewer=pcbddc->dbg_viewer; 1930 1931 PetscFunctionBegin; 1932 /* Init local Graph struct */ 1933 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1934 1935 /* Check validity of the csr graph passed in by the user */ 1936 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1937 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1938 } 1939 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1940 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1941 Mat mat_adj; 1942 const PetscInt *xadj,*adjncy; 1943 PetscBool flg_row=PETSC_TRUE; 1944 1945 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1946 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1947 if (!flg_row) { 1948 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1949 } 1950 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1951 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1952 if (!flg_row) { 1953 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1954 } 1955 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1956 } 1957 1958 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1959 vertex_size = 1; 1960 if (!pcbddc->n_ISForDofs) { 1961 IS *custom_ISForDofs; 1962 1963 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1964 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1965 for (i=0;i<bs;i++) { 1966 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1967 } 1968 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1969 /* remove my references to IS objects */ 1970 for (i=0;i<bs;i++) { 1971 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1972 } 1973 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1974 } else { /* mat block size as vertex size (used for elasticity) */ 1975 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1976 } 1977 1978 /* Setup of Graph */ 1979 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1980 1981 /* Graph's connected components analysis */ 1982 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1983 1984 /* print some info to stdout */ 1985 if (pcbddc->dbg_flag) { 1986 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1993 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 1994 { 1995 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1996 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 n = 0; 2001 vertices = 0; 2002 if (pcbddc->ConstraintMatrix) { 2003 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2004 for (i=0;i<local_primal_size;i++) { 2005 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2006 if (size_of_constraint == 1) n++; 2007 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2008 } 2009 if (vertices_idx) { 2010 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2011 n = 0; 2012 for (i=0;i<local_primal_size;i++) { 2013 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2014 if (size_of_constraint == 1) { 2015 vertices[n++]=row_cmat_indices[0]; 2016 } 2017 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2018 } 2019 } 2020 } 2021 *n_vertices = n; 2022 if (vertices_idx) *vertices_idx = vertices; 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2028 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 2029 { 2030 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2031 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2032 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2033 PetscBool *touched; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 n = 0; 2038 constraints_index = 0; 2039 if (pcbddc->ConstraintMatrix) { 2040 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2041 max_size_of_constraint = 0; 2042 for (i=0;i<local_primal_size;i++) { 2043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2044 if (size_of_constraint > 1) { 2045 n++; 2046 } 2047 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2048 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2049 } 2050 if (constraints_idx) { 2051 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2052 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2053 ierr = PetscMalloc(local_size*sizeof(PetscBool),&touched);CHKERRQ(ierr); 2054 ierr = PetscMemzero(touched,local_size*sizeof(PetscBool));CHKERRQ(ierr); 2055 n = 0; 2056 for (i=0;i<local_primal_size;i++) { 2057 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2058 if (size_of_constraint > 1) { 2059 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2060 /* find first untouched local node */ 2061 j = 0; 2062 while(touched[row_cmat_indices[j]]) j++; 2063 min_index = row_cmat_global_indices[j]; 2064 min_loc = j; 2065 /* search the minimum among nodes not yet touched on the connected component 2066 since there can be more than one constraint on a single cc */ 2067 for (j=1;j<size_of_constraint;j++) { 2068 if (min_index > row_cmat_global_indices[j] && !touched[row_cmat_indices[j]]) { 2069 min_index = row_cmat_global_indices[j]; 2070 min_loc = j; 2071 } 2072 } 2073 touched[row_cmat_indices[min_loc]] = PETSC_TRUE; 2074 constraints_index[n++] = row_cmat_indices[min_loc]; 2075 } 2076 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2077 } 2078 ierr = PetscFree(touched);CHKERRQ(ierr); 2079 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2080 } 2081 } 2082 *n_constraints = n; 2083 if (constraints_idx) *constraints_idx = constraints_index; 2084 PetscFunctionReturn(0); 2085 } 2086 2087 /* the next two functions has been adapted from pcis.c */ 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "PCBDDCApplySchur" 2090 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2091 { 2092 PetscErrorCode ierr; 2093 PC_IS *pcis = (PC_IS*)(pc->data); 2094 2095 PetscFunctionBegin; 2096 if (!vec2_B) { vec2_B = v; } 2097 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2098 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2099 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2100 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2101 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2107 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2108 { 2109 PetscErrorCode ierr; 2110 PC_IS *pcis = (PC_IS*)(pc->data); 2111 2112 PetscFunctionBegin; 2113 if (!vec2_B) { vec2_B = v; } 2114 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2115 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2116 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2117 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2118 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "PCBDDCSubsetNumbering" 2124 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2125 { 2126 Vec local_vec,global_vec; 2127 IS seqis,paris; 2128 VecScatter scatter_ctx; 2129 PetscScalar *array; 2130 PetscInt *temp_global_dofs; 2131 PetscScalar globalsum; 2132 PetscInt i,j,s; 2133 PetscInt nlocals,first_index,old_index,max_local; 2134 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2135 PetscMPIInt *dof_sizes,*dof_displs; 2136 PetscBool first_found; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 /* mpi buffers */ 2141 MPI_Comm_size(comm,&size_prec_comm); 2142 MPI_Comm_rank(comm,&rank_prec_comm); 2143 j = ( !rank_prec_comm ? size_prec_comm : 0); 2144 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2145 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2146 /* get maximum size of subset */ 2147 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2148 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2149 max_local = 0; 2150 if (n_local_dofs) { 2151 max_local = temp_global_dofs[0]; 2152 for (i=1;i<n_local_dofs;i++) { 2153 if (max_local < temp_global_dofs[i] ) { 2154 max_local = temp_global_dofs[i]; 2155 } 2156 } 2157 } 2158 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2159 max_global++; 2160 max_local = 0; 2161 if (n_local_dofs) { 2162 max_local = local_dofs[0]; 2163 for (i=1;i<n_local_dofs;i++) { 2164 if (max_local < local_dofs[i] ) { 2165 max_local = local_dofs[i]; 2166 } 2167 } 2168 } 2169 max_local++; 2170 /* allocate workspace */ 2171 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2172 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2173 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2174 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2175 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2176 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2177 /* create scatter */ 2178 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2179 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2180 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2181 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2182 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2183 /* init array */ 2184 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2185 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2186 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2187 if (local_dofs_mult) { 2188 for (i=0;i<n_local_dofs;i++) { 2189 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2190 } 2191 } else { 2192 for (i=0;i<n_local_dofs;i++) { 2193 array[local_dofs[i]]=1.0; 2194 } 2195 } 2196 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2197 /* scatter into global vec and get total number of global dofs */ 2198 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2199 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2200 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2201 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2202 /* Fill global_vec with cumulative function for global numbering */ 2203 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2204 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2205 nlocals = 0; 2206 first_index = -1; 2207 first_found = PETSC_FALSE; 2208 for (i=0;i<s;i++) { 2209 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2210 first_found = PETSC_TRUE; 2211 first_index = i; 2212 } 2213 nlocals += (PetscInt)PetscRealPart(array[i]); 2214 } 2215 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2216 if (!rank_prec_comm) { 2217 dof_displs[0]=0; 2218 for (i=1;i<size_prec_comm;i++) { 2219 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2220 } 2221 } 2222 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2223 if (first_found) { 2224 array[first_index] += (PetscScalar)nlocals; 2225 old_index = first_index; 2226 for (i=first_index+1;i<s;i++) { 2227 if (PetscRealPart(array[i]) > 0.0) { 2228 array[i] += array[old_index]; 2229 old_index = i; 2230 } 2231 } 2232 } 2233 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2234 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2235 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2236 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2237 /* get global ordering of local dofs */ 2238 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2239 if (local_dofs_mult) { 2240 for (i=0;i<n_local_dofs;i++) { 2241 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2242 } 2243 } else { 2244 for (i=0;i<n_local_dofs;i++) { 2245 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2246 } 2247 } 2248 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2249 /* free workspace */ 2250 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2251 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2252 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2253 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2254 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2255 /* return pointer to global ordering of local dofs */ 2256 *global_numbering_subset = temp_global_dofs; 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2262 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2263 { 2264 PetscInt i,j; 2265 PetscScalar *alphas; 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 /* this implements stabilized Gram-Schmidt */ 2270 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2271 for (i=0;i<n;i++) { 2272 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2273 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2274 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2275 } 2276 ierr = PetscFree(alphas);CHKERRQ(ierr); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 /* BDDC requires metis 5.0.1 for multilevel */ 2281 #if defined(PETSC_HAVE_METIS) 2282 #include "metis.h" 2283 #define MetisInt idx_t 2284 #define MetisScalar real_t 2285 #endif 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "PCBDDCSetUpCoarseEnvironment" 2289 PetscErrorCode PCBDDCSetUpCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2290 { 2291 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2292 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2293 PC_IS *pcis = (PC_IS*)pc->data; 2294 MPI_Comm prec_comm; 2295 MPI_Comm coarse_comm; 2296 2297 PetscInt coarse_size; 2298 2299 MatNullSpace CoarseNullSpace; 2300 2301 /* common to all choiches */ 2302 PetscScalar *temp_coarse_mat_vals; 2303 PetscScalar *ins_coarse_mat_vals; 2304 PetscInt *ins_local_primal_indices; 2305 PetscMPIInt *localsizes2,*localdispl2; 2306 PetscMPIInt size_prec_comm; 2307 PetscMPIInt rank_prec_comm; 2308 PetscMPIInt active_rank=MPI_PROC_NULL; 2309 PetscMPIInt master_proc=0; 2310 PetscInt ins_local_primal_size; 2311 /* specific to MULTILEVEL_BDDC */ 2312 PetscMPIInt *ranks_recv=0; 2313 PetscMPIInt count_recv=0; 2314 PetscMPIInt rank_coarse_proc_send_to=-1; 2315 PetscMPIInt coarse_color = MPI_UNDEFINED; 2316 ISLocalToGlobalMapping coarse_ISLG; 2317 /* some other variables */ 2318 PetscErrorCode ierr; 2319 MatType coarse_mat_type; 2320 PCType coarse_pc_type; 2321 KSPType coarse_ksp_type; 2322 PC pc_temp; 2323 PetscInt i,j,k; 2324 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2325 /* verbose output viewer */ 2326 PetscViewer viewer=pcbddc->dbg_viewer; 2327 PetscInt dbg_flag=pcbddc->dbg_flag; 2328 2329 PetscInt offset,offset2; 2330 PetscMPIInt im_active,active_procs; 2331 PetscInt *dnz,*onz; 2332 2333 PetscBool setsym,issym=PETSC_FALSE; 2334 2335 PetscFunctionBegin; 2336 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2337 ins_local_primal_indices = 0; 2338 ins_coarse_mat_vals = 0; 2339 localsizes2 = 0; 2340 localdispl2 = 0; 2341 temp_coarse_mat_vals = 0; 2342 coarse_ISLG = 0; 2343 2344 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2345 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2346 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2347 2348 /* Assign global numbering to coarse dofs */ 2349 { 2350 PetscInt *auxlocal_primal,*aux_idx; 2351 PetscMPIInt mpi_local_primal_size; 2352 PetscScalar coarsesum,*array; 2353 2354 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2355 2356 /* Construct needed data structures for message passing */ 2357 j = 0; 2358 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2359 j = size_prec_comm; 2360 } 2361 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2362 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2363 /* Gather local_primal_size information for all processes */ 2364 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2365 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2366 } else { 2367 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2368 } 2369 pcbddc->replicated_primal_size = 0; 2370 for (i=0; i<j; i++) { 2371 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2372 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2373 } 2374 2375 /* This code fragment assumes that the number of local constraints per connected component 2376 is not greater than the number of nodes defined for the connected component 2377 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2378 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2379 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2380 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2381 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2382 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2383 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2384 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2385 /* Compute number of coarse dofs */ 2386 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2387 2388 if (dbg_flag) { 2389 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2390 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2391 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2392 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2393 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2394 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2395 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2396 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2397 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2398 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2399 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2400 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2401 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2402 for (i=0;i<pcis->n;i++) { 2403 if (array[i] == 1.0) { 2404 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2405 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2406 } 2407 } 2408 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2409 for (i=0;i<pcis->n;i++) { 2410 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2411 } 2412 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2413 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2414 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2415 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2416 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2417 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2419 } 2420 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2421 } 2422 2423 if (dbg_flag) { 2424 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); 2425 if (dbg_flag > 1) { 2426 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2427 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2429 for (i=0;i<pcbddc->local_primal_size;i++) { 2430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2431 } 2432 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2433 } 2434 } 2435 2436 im_active = 0; 2437 if (pcis->n) im_active = 1; 2438 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2439 2440 /* adapt coarse problem type */ 2441 #if defined(PETSC_HAVE_METIS) 2442 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2443 if (pcbddc->current_level < pcbddc->max_levels) { 2444 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2445 if (dbg_flag) { 2446 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2447 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2448 } 2449 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2450 } 2451 } else { 2452 if (dbg_flag) { 2453 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2454 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2455 } 2456 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2457 } 2458 } 2459 #else 2460 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2461 #endif 2462 2463 switch(pcbddc->coarse_problem_type){ 2464 2465 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2466 { 2467 #if defined(PETSC_HAVE_METIS) 2468 /* we need additional variables */ 2469 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2470 MetisInt *metis_coarse_subdivision; 2471 MetisInt options[METIS_NOPTIONS]; 2472 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2473 PetscMPIInt procs_jumps_coarse_comm; 2474 PetscMPIInt *coarse_subdivision; 2475 PetscMPIInt *total_count_recv; 2476 PetscMPIInt *total_ranks_recv; 2477 PetscMPIInt *displacements_recv; 2478 PetscMPIInt *my_faces_connectivity; 2479 PetscMPIInt *petsc_faces_adjncy; 2480 MetisInt *faces_adjncy; 2481 MetisInt *faces_xadj; 2482 PetscMPIInt *number_of_faces; 2483 PetscMPIInt *faces_displacements; 2484 PetscInt *array_int; 2485 PetscMPIInt my_faces=0; 2486 PetscMPIInt total_faces=0; 2487 PetscInt ranks_stretching_ratio; 2488 2489 /* define some quantities */ 2490 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2491 coarse_mat_type = MATIS; 2492 coarse_pc_type = PCBDDC; 2493 coarse_ksp_type = KSPRICHARDSON; 2494 2495 /* details of coarse decomposition */ 2496 n_subdomains = active_procs; 2497 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2498 ranks_stretching_ratio = size_prec_comm/active_procs; 2499 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2500 2501 #if 0 2502 PetscMPIInt *old_ranks; 2503 PetscInt *new_ranks,*jj,*ii; 2504 MatPartitioning mat_part; 2505 IS coarse_new_decomposition,is_numbering; 2506 PetscViewer viewer_test; 2507 MPI_Comm test_coarse_comm; 2508 PetscMPIInt test_coarse_color; 2509 Mat mat_adj; 2510 /* Create new communicator for coarse problem splitting the old one */ 2511 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2512 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2513 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2514 test_coarse_comm = MPI_COMM_NULL; 2515 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2516 if (im_active) { 2517 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2518 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2519 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2520 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2521 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2522 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2523 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2524 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2525 k = pcis->n_neigh-1; 2526 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2527 ii[0]=0; 2528 ii[1]=k; 2529 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2530 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2531 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2532 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2533 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2534 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2535 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2536 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2537 printf("Setting Nparts %d\n",n_parts); 2538 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2539 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2540 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2541 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2542 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2543 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2544 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2545 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2546 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2547 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2548 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2549 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2550 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2551 } 2552 #endif 2553 2554 /* build CSR graph of subdomains' connectivity */ 2555 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2556 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2557 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2558 for (j=0;j<pcis->n_shared[i];j++){ 2559 array_int[ pcis->shared[i][j] ]+=1; 2560 } 2561 } 2562 for (i=1;i<pcis->n_neigh;i++){ 2563 for (j=0;j<pcis->n_shared[i];j++){ 2564 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2565 my_faces++; 2566 break; 2567 } 2568 } 2569 } 2570 2571 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2572 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2573 my_faces=0; 2574 for (i=1;i<pcis->n_neigh;i++){ 2575 for (j=0;j<pcis->n_shared[i];j++){ 2576 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2577 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2578 my_faces++; 2579 break; 2580 } 2581 } 2582 } 2583 if (rank_prec_comm == master_proc) { 2584 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2585 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2586 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2587 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2588 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2589 } 2590 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2591 if (rank_prec_comm == master_proc) { 2592 faces_xadj[0]=0; 2593 faces_displacements[0]=0; 2594 j=0; 2595 for (i=1;i<size_prec_comm+1;i++) { 2596 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2597 if (number_of_faces[i-1]) { 2598 j++; 2599 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2600 } 2601 } 2602 } 2603 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2604 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2605 ierr = PetscFree(array_int);CHKERRQ(ierr); 2606 if (rank_prec_comm == master_proc) { 2607 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2608 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2609 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2610 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2611 } 2612 2613 if ( rank_prec_comm == master_proc ) { 2614 2615 PetscInt heuristic_for_metis=3; 2616 2617 ncon=1; 2618 faces_nvtxs=n_subdomains; 2619 /* partition graoh induced by face connectivity */ 2620 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2621 ierr = METIS_SetDefaultOptions(options); 2622 /* we need a contiguous partition of the coarse mesh */ 2623 options[METIS_OPTION_CONTIG]=1; 2624 options[METIS_OPTION_NITER]=30; 2625 if (pcbddc->coarsening_ratio > 1) { 2626 if (n_subdomains>n_parts*heuristic_for_metis) { 2627 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2628 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2629 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2630 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2631 } else { 2632 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2633 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2634 } 2635 } else { 2636 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2637 } 2638 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2639 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2640 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2641 2642 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2643 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2644 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2645 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2646 } 2647 2648 /* Create new communicator for coarse problem splitting the old one */ 2649 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2650 coarse_color=0; /* for communicator splitting */ 2651 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2652 } 2653 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2654 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2655 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2656 2657 if ( coarse_color == 0 ) { 2658 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2659 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2660 } else { 2661 rank_coarse_comm = MPI_PROC_NULL; 2662 } 2663 2664 /* master proc take care of arranging and distributing coarse information */ 2665 if (rank_coarse_comm == master_proc) { 2666 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2667 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2668 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2669 /* some initializations */ 2670 displacements_recv[0]=0; 2671 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2672 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2673 for (j=0;j<size_coarse_comm;j++) { 2674 for (i=0;i<size_prec_comm;i++) { 2675 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2676 } 2677 } 2678 /* displacements needed for scatterv of total_ranks_recv */ 2679 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2680 2681 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2682 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2683 for (j=0;j<size_coarse_comm;j++) { 2684 for (i=0;i<size_prec_comm;i++) { 2685 if (coarse_subdivision[i]==j) { 2686 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2687 total_count_recv[j]+=1; 2688 } 2689 } 2690 } 2691 /*for (j=0;j<size_coarse_comm;j++) { 2692 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2693 for (i=0;i<total_count_recv[j];i++) { 2694 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2695 } 2696 printf("\n"); 2697 }*/ 2698 2699 /* identify new decomposition in terms of ranks in the old communicator */ 2700 for (i=0;i<n_subdomains;i++) { 2701 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2702 } 2703 /*printf("coarse_subdivision in old end new ranks\n"); 2704 for (i=0;i<size_prec_comm;i++) 2705 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2706 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2707 } else { 2708 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2709 } 2710 printf("\n");*/ 2711 } 2712 2713 /* Scatter new decomposition for send details */ 2714 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2715 /* Scatter receiving details to members of coarse decomposition */ 2716 if ( coarse_color == 0) { 2717 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2718 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2719 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2720 } 2721 2722 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2723 if (coarse_color == 0) { 2724 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2725 for (i=0;i<count_recv;i++) 2726 printf("%d ",ranks_recv[i]); 2727 printf("\n"); 2728 }*/ 2729 2730 if (rank_prec_comm == master_proc) { 2731 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2732 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2733 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2734 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2735 } 2736 #endif 2737 break; 2738 } 2739 2740 case(REPLICATED_BDDC): 2741 2742 pcbddc->coarse_communications_type = GATHERS_BDDC; 2743 coarse_mat_type = MATSEQAIJ; 2744 coarse_pc_type = PCLU; 2745 coarse_ksp_type = KSPPREONLY; 2746 coarse_comm = PETSC_COMM_SELF; 2747 active_rank = rank_prec_comm; 2748 break; 2749 2750 case(PARALLEL_BDDC): 2751 2752 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2753 coarse_mat_type = MATAIJ; 2754 coarse_pc_type = PCREDUNDANT; 2755 coarse_ksp_type = KSPPREONLY; 2756 coarse_comm = prec_comm; 2757 active_rank = rank_prec_comm; 2758 break; 2759 2760 case(SEQUENTIAL_BDDC): 2761 pcbddc->coarse_communications_type = GATHERS_BDDC; 2762 coarse_mat_type = MATAIJ; 2763 coarse_pc_type = PCLU; 2764 coarse_ksp_type = KSPPREONLY; 2765 coarse_comm = PETSC_COMM_SELF; 2766 active_rank = master_proc; 2767 break; 2768 } 2769 2770 switch(pcbddc->coarse_communications_type){ 2771 2772 case(SCATTERS_BDDC): 2773 { 2774 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2775 2776 IS coarse_IS; 2777 2778 if(pcbddc->coarsening_ratio == 1) { 2779 ins_local_primal_size = pcbddc->local_primal_size; 2780 ins_local_primal_indices = pcbddc->local_primal_indices; 2781 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2782 /* nonzeros */ 2783 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2784 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2785 for (i=0;i<ins_local_primal_size;i++) { 2786 dnz[i] = ins_local_primal_size; 2787 } 2788 } else { 2789 PetscMPIInt send_size; 2790 PetscMPIInt *send_buffer; 2791 PetscInt *aux_ins_indices; 2792 PetscInt ii,jj; 2793 MPI_Request *requests; 2794 2795 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2796 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2797 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2798 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2799 pcbddc->replicated_primal_size = count_recv; 2800 j = 0; 2801 for (i=0;i<count_recv;i++) { 2802 pcbddc->local_primal_displacements[i] = j; 2803 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2804 } 2805 pcbddc->local_primal_displacements[count_recv] = j; 2806 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2807 /* allocate auxiliary space */ 2808 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2809 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2810 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2811 /* allocate stuffs for message massing */ 2812 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2813 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2814 /* send indices to be inserted */ 2815 for (i=0;i<count_recv;i++) { 2816 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2817 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2818 } 2819 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2820 send_size = pcbddc->local_primal_size; 2821 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2822 for (i=0;i<send_size;i++) { 2823 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2824 } 2825 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2826 } 2827 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2828 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2829 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2830 } 2831 j = 0; 2832 for (i=0;i<count_recv;i++) { 2833 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2834 localsizes2[i] = ii*ii; 2835 localdispl2[i] = j; 2836 j += localsizes2[i]; 2837 jj = pcbddc->local_primal_displacements[i]; 2838 /* it counts the coarse subdomains sharing the coarse node */ 2839 for (k=0;k<ii;k++) { 2840 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2841 } 2842 } 2843 /* temp_coarse_mat_vals used to store matrix values to be received */ 2844 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2845 /* evaluate how many values I will insert in coarse mat */ 2846 ins_local_primal_size = 0; 2847 for (i=0;i<coarse_size;i++) { 2848 if (aux_ins_indices[i]) { 2849 ins_local_primal_size++; 2850 } 2851 } 2852 /* evaluate indices I will insert in coarse mat */ 2853 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2854 j = 0; 2855 for(i=0;i<coarse_size;i++) { 2856 if(aux_ins_indices[i]) { 2857 ins_local_primal_indices[j] = i; 2858 j++; 2859 } 2860 } 2861 /* processes partecipating in coarse problem receive matrix data from their friends */ 2862 for (i=0;i<count_recv;i++) { 2863 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2864 } 2865 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2866 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2867 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2868 } 2869 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2870 /* nonzeros */ 2871 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2872 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2873 /* use aux_ins_indices to realize a global to local mapping */ 2874 j=0; 2875 for(i=0;i<coarse_size;i++){ 2876 if(aux_ins_indices[i]==0){ 2877 aux_ins_indices[i]=-1; 2878 } else { 2879 aux_ins_indices[i]=j; 2880 j++; 2881 } 2882 } 2883 for (i=0;i<count_recv;i++) { 2884 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2885 for (k=0;k<j;k++) { 2886 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2887 } 2888 } 2889 /* check */ 2890 for (i=0;i<ins_local_primal_size;i++) { 2891 if (dnz[i] > ins_local_primal_size) { 2892 dnz[i] = ins_local_primal_size; 2893 } 2894 } 2895 ierr = PetscFree(requests);CHKERRQ(ierr); 2896 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2897 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2898 } 2899 /* create local to global mapping needed by coarse MATIS */ 2900 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2901 coarse_comm = prec_comm; 2902 active_rank = rank_prec_comm; 2903 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2904 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2905 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2906 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2907 /* arrays for values insertion */ 2908 ins_local_primal_size = pcbddc->local_primal_size; 2909 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2910 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2911 for (j=0;j<ins_local_primal_size;j++){ 2912 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2913 for (i=0;i<ins_local_primal_size;i++) { 2914 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2915 } 2916 } 2917 } 2918 break; 2919 2920 } 2921 2922 case(GATHERS_BDDC): 2923 { 2924 2925 PetscMPIInt mysize,mysize2; 2926 PetscMPIInt *send_buffer; 2927 2928 if (rank_prec_comm==active_rank) { 2929 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2930 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 2931 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2932 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2933 /* arrays for values insertion */ 2934 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2935 localdispl2[0]=0; 2936 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2937 j=0; 2938 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2939 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2940 } 2941 2942 mysize=pcbddc->local_primal_size; 2943 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2944 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2945 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2946 2947 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2948 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2949 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2950 } else { 2951 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2952 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2953 } 2954 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2955 break; 2956 }/* switch on coarse problem and communications associated with finished */ 2957 } 2958 2959 /* Now create and fill up coarse matrix */ 2960 if ( rank_prec_comm == active_rank ) { 2961 2962 Mat matis_coarse_local_mat; 2963 2964 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2965 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2966 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2967 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2968 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2969 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2970 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2971 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2972 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2973 } else { 2974 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2975 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2976 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2977 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2978 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2979 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2980 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2981 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2982 } 2983 /* preallocation */ 2984 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2985 2986 PetscInt lrows,lcols,bs; 2987 2988 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2989 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2990 ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr); 2991 2992 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2993 2994 Vec vec_dnz,vec_onz; 2995 PetscScalar *my_dnz,*my_onz,*array; 2996 PetscInt *mat_ranges,*row_ownership; 2997 PetscInt coarse_index_row,coarse_index_col,owner; 2998 2999 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 3000 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 3001 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 3002 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 3003 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 3004 3005 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 3006 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 3007 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3008 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3009 3010 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 3011 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 3012 for (i=0;i<size_prec_comm;i++) { 3013 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 3014 row_ownership[j]=i; 3015 } 3016 } 3017 3018 for (i=0;i<pcbddc->local_primal_size;i++) { 3019 coarse_index_row = pcbddc->local_primal_indices[i]; 3020 owner = row_ownership[coarse_index_row]; 3021 for (j=i;j<pcbddc->local_primal_size;j++) { 3022 owner = row_ownership[coarse_index_row]; 3023 coarse_index_col = pcbddc->local_primal_indices[j]; 3024 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 3025 my_dnz[i] += 1.0; 3026 } else { 3027 my_onz[i] += 1.0; 3028 } 3029 if (i != j) { 3030 owner = row_ownership[coarse_index_col]; 3031 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 3032 my_dnz[j] += 1.0; 3033 } else { 3034 my_onz[j] += 1.0; 3035 } 3036 } 3037 } 3038 } 3039 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 3040 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 3041 if (pcbddc->local_primal_size) { 3042 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 3043 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 3044 } 3045 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 3046 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 3047 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 3048 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 3049 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 3050 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 3051 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 3052 3053 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 3054 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 3055 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 3056 3057 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 3058 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 3059 ierr = PetscFree(my_onz);CHKERRQ(ierr); 3060 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 3061 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 3062 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 3063 } else { 3064 for (k=0;k<size_prec_comm;k++){ 3065 offset=pcbddc->local_primal_displacements[k]; 3066 offset2=localdispl2[k]; 3067 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3068 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3069 for (j=0;j<ins_local_primal_size;j++){ 3070 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3071 } 3072 for (j=0;j<ins_local_primal_size;j++) { 3073 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 3074 } 3075 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3076 } 3077 } 3078 3079 /* check */ 3080 for (i=0;i<lrows;i++) { 3081 if (dnz[i]>lcols) dnz[i]=lcols; 3082 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 3083 } 3084 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 3085 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 3086 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3087 } else { 3088 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3089 ierr = PetscFree(dnz);CHKERRQ(ierr); 3090 } 3091 /* insert values */ 3092 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3093 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3094 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3095 if (pcbddc->coarsening_ratio == 1) { 3096 ins_coarse_mat_vals = coarse_submat_vals; 3097 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3098 } else { 3099 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3100 for (k=0;k<pcbddc->replicated_primal_size;k++) { 3101 offset = pcbddc->local_primal_displacements[k]; 3102 offset2 = localdispl2[k]; 3103 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 3104 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3105 for (j=0;j<ins_local_primal_size;j++){ 3106 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3107 } 3108 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3109 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3110 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3111 } 3112 } 3113 ins_local_primal_indices = 0; 3114 ins_coarse_mat_vals = 0; 3115 } else { 3116 for (k=0;k<size_prec_comm;k++){ 3117 offset=pcbddc->local_primal_displacements[k]; 3118 offset2=localdispl2[k]; 3119 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3120 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3121 for (j=0;j<ins_local_primal_size;j++){ 3122 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3123 } 3124 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3125 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3126 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3127 } 3128 ins_local_primal_indices = 0; 3129 ins_coarse_mat_vals = 0; 3130 } 3131 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 /* symmetry of coarse matrix */ 3134 if (issym) { 3135 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3136 } 3137 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3138 } 3139 3140 /* create loc to glob scatters if needed */ 3141 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3142 IS local_IS,global_IS; 3143 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3144 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3145 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3146 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3147 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3148 } 3149 3150 /* free memory no longer needed */ 3151 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3152 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3153 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3154 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3155 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3156 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3157 3158 /* Compute coarse null space */ 3159 CoarseNullSpace = 0; 3160 if (pcbddc->NullSpace) { 3161 ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr); 3162 } 3163 3164 /* KSP for coarse problem */ 3165 if (rank_prec_comm == active_rank) { 3166 PetscBool isbddc=PETSC_FALSE; 3167 3168 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3169 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3170 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3171 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3172 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3173 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3174 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3175 /* Allow user's customization */ 3176 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3177 /* Set Up PC for coarse problem BDDC */ 3178 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3179 i = pcbddc->current_level+1; 3180 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3181 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3182 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3183 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3184 if (CoarseNullSpace) { 3185 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3186 } 3187 if (dbg_flag) { 3188 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3189 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3190 } 3191 } else { 3192 if (CoarseNullSpace) { 3193 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3194 } 3195 } 3196 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3197 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3198 3199 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3200 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3201 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3202 if (j == 1) { 3203 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3204 if (isbddc) { 3205 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3206 } 3207 } 3208 } 3209 /* Check coarse problem if requested */ 3210 if ( dbg_flag && rank_prec_comm == active_rank ) { 3211 KSP check_ksp; 3212 PC check_pc; 3213 Vec check_vec; 3214 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3215 KSPType check_ksp_type; 3216 3217 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3218 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3219 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3220 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3221 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3222 if (issym) check_ksp_type = KSPCG; 3223 else check_ksp_type = KSPGMRES; 3224 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3225 } else { 3226 check_ksp_type = KSPPREONLY; 3227 } 3228 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3229 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3230 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3231 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3232 /* create random vec */ 3233 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3234 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3235 if (CoarseNullSpace) { 3236 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3237 } 3238 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3239 /* solve coarse problem */ 3240 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3241 if (CoarseNullSpace) { 3242 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3243 } 3244 /* check coarse problem residual error */ 3245 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3246 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3247 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3248 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3249 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3250 /* get eigenvalue estimation if inexact */ 3251 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3252 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3253 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3254 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3255 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3256 } 3257 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3258 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3259 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3260 } 3261 if (dbg_flag) { 3262 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3263 } 3264 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3265 3266 PetscFunctionReturn(0); 3267 } 3268 3269