1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscErrorCode ierr; 11 12 PetscFunctionBegin; 13 /* Compute matrix after change of basis and extract local submatrices */ 14 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 15 16 /* Allocate needed vectors */ 17 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 18 19 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 20 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 21 22 /* Setup local solvers ksp_D and ksp_R */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* setup local correction and local part of coarse basis */ 31 ierr = PCBDDCSetUpCoarseLocal(pc);CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNCT__ 36 #define __FUNCT__ "PCBDDCSetLevel" 37 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 38 { 39 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 40 41 PetscFunctionBegin; 42 pcbddc->current_level=level; 43 PetscFunctionReturn(0); 44 } 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "PCBDDCResetCustomization" 48 PetscErrorCode PCBDDCResetCustomization(PC pc) 49 { 50 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 51 PetscInt i; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 60 for (i=0;i<pcbddc->n_ISForDofs;i++) { 61 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 62 } 63 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCCreateWorkVectors" 118 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 128 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 129 local_primal_size = n_constraints+n_vertices; 130 n_R = pcis->n-n_vertices; 131 /* local work vectors */ 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 137 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 138 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 139 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 140 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 141 if (n_constraints) { 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 145 } 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 151 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc) 152 { 153 PetscErrorCode ierr; 154 /* pointers to pcis and pcbddc */ 155 PC_IS* pcis = (PC_IS*)pc->data; 156 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 157 /* submatrices of local problem */ 158 Mat A_RV,A_VR,A_VV; 159 /* working matrices */ 160 Mat M1,M2,M3,C_CR; 161 /* working vectors */ 162 Vec vec1_C,vec2_C,vec1_V,vec2_V; 163 /* additional working stuff */ 164 IS is_aux; 165 ISLocalToGlobalMapping BtoNmap; 166 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 167 const PetscScalar *array,*row_cmat_values; 168 const PetscInt *row_cmat_indices,*idx_R_local; 169 PetscInt *vertices,*idx_V_B,*auxindices; 170 PetscInt n_vertices,n_constraints,size_of_constraint; 171 PetscInt i,j,n_R,n_D,n_B; 172 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 173 /* Vector and matrix types */ 174 VecType impVecType; 175 MatType impMatType; 176 /* some shortcuts to scalars */ 177 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 178 /* for debugging purposes */ 179 PetscReal *coarsefunctions_errors,*constraints_errors; 180 181 PetscFunctionBegin; 182 /* get number of vertices and their local indices */ 183 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 184 n_constraints = pcbddc->local_primal_size-n_vertices; 185 /* Set Non-overlapping dimensions */ 186 n_B = pcis->n_B; n_D = pcis->n - n_B; 187 n_R = pcis->n-n_vertices; 188 189 /* Set types for local objects needed by BDDC precondtioner */ 190 impMatType = MATSEQDENSE; 191 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 192 193 /* Allocating some extra storage just to be safe */ 194 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 195 for (i=0;i<pcis->n;i++) auxindices[i]=i; 196 197 /* vertices in boundary numbering */ 198 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 199 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 200 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 201 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 202 if (i != n_vertices) { 203 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 204 } 205 206 /* some work vectors on vertices and/or constraints */ 207 if (n_vertices) { 208 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 209 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 210 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 211 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 212 } 213 if (n_constraints) { 214 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 215 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 216 } 217 218 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 219 if (n_constraints) { 220 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 221 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 222 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 223 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 224 225 /* Extract constraints on R nodes: C_{CR} */ 226 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 227 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 228 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 229 230 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 231 for (i=0;i<n_constraints;i++) { 232 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 233 /* Get row of constraint matrix in R numbering */ 234 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 235 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 237 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 238 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 239 /* Solve for row of constraint matrix in R numbering */ 240 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 241 /* Set values in local_auxmat2 */ 242 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 243 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 245 } 246 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 249 250 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 251 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 252 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 253 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 254 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 255 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 256 ierr = MatSetUp(M1);CHKERRQ(ierr); 257 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 258 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 259 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 260 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 261 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 262 ierr = MatDestroy(&M2);CHKERRQ(ierr); 263 ierr = MatDestroy(&M3);CHKERRQ(ierr); 264 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 265 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 266 } 267 268 /* Get submatrices from subdomain matrix */ 269 if (n_vertices) { 270 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 271 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 273 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 274 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 275 } 276 277 /* Matrix of coarse basis functions (local) */ 278 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 279 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 280 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 281 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 282 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 283 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 284 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 285 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 286 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 287 } 288 289 if (pcbddc->dbg_flag) { 290 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 291 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 292 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 293 } 294 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 295 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 296 297 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 298 299 /* vertices */ 300 for (i=0;i<n_vertices;i++) { 301 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 302 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 303 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 304 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 305 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 306 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 307 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 308 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 309 if (n_constraints) { 310 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 311 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 313 } 314 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 315 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 316 317 /* Set values in coarse basis function and subdomain part of coarse_mat */ 318 /* coarse basis functions */ 319 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 320 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 322 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 323 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 324 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 325 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 326 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 327 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 328 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 329 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 330 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 331 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 332 } 333 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 334 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 335 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 337 if (n_constraints) { 338 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 339 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 341 } 342 343 /* check */ 344 if (pcbddc->dbg_flag) { 345 /* assemble subdomain vector on local nodes */ 346 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 348 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 350 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 351 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 352 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 353 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 354 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 355 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 356 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 357 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 358 if (n_constraints) { 359 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 360 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 362 } 363 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 364 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 365 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 366 /* check saddle point solution */ 367 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 368 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 369 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 370 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 371 /* shift by the identity matrix */ 372 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 373 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 374 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 376 } 377 } 378 379 /* constraints */ 380 for (i=0;i<n_constraints;i++) { 381 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 382 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 383 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 384 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 385 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 386 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 387 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 if (n_vertices) { 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 } 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 j = i+n_vertices; /* don't touch this! */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 if (n_vertices) { 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 } 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 if (n_vertices) { 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 } 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 449 } 450 } 451 /* call assembling routines for local coarse basis */ 452 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 455 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 } 458 459 /* compute other basis functions for non-symmetric problems */ 460 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 461 if (!setsym || (setsym && !issym)) { 462 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 463 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 464 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 465 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 466 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 467 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 468 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 470 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 471 } 472 for (i=0;i<pcbddc->local_primal_size;i++) { 473 if (n_constraints) { 474 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 475 for (j=0;j<n_constraints;j++) { 476 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 477 } 478 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 479 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 480 } 481 if (i<n_vertices) { 482 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 483 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 485 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 486 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 487 if (n_constraints) { 488 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 489 } 490 } else { 491 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 492 } 493 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 494 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 498 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 500 if (i<n_vertices) { 501 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 502 } 503 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) { 504 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 507 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 509 } 510 511 if (pcbddc->dbg_flag) { 512 /* assemble subdomain vector on nodes */ 513 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 514 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 515 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 516 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 517 if (i<n_vertices) { 518 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 519 } 520 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 522 /* assemble subdomain vector of lagrange multipliers */ 523 for (j=0;j<pcbddc->local_primal_size;j++) { 524 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 527 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 528 /* check saddle point solution */ 529 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 530 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 531 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 532 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 533 /* shift by the identity matrix */ 534 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 535 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 536 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 537 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 538 } 539 } 540 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 541 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 542 if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 543 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 } 546 } 547 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 548 /* Checking coarse_sub_mat and coarse basis functios */ 549 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 550 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 551 if (pcbddc->dbg_flag) { 552 Mat coarse_sub_mat; 553 Mat AUXMAT,TM1,TM2,TM3,TM4; 554 Mat coarse_phi_D,coarse_phi_B; 555 Mat coarse_psi_D,coarse_psi_B; 556 Mat A_II,A_BB,A_IB,A_BI; 557 MatType checkmattype=MATSEQAIJ; 558 PetscReal real_value; 559 560 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 561 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 562 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 563 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 564 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 565 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 566 if (pcbddc->coarse_psi_B) { 567 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 568 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 569 } 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 571 572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 573 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 574 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 575 if (pcbddc->coarse_psi_B) { 576 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 577 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 579 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 580 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 582 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 583 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 585 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 586 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 588 } else { 589 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 590 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 591 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 592 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 594 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 } 598 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 599 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 600 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 601 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 602 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 603 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 608 for (i=0;i<pcbddc->local_primal_size;i++) { 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 610 } 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 612 for (i=0;i<pcbddc->local_primal_size;i++) { 613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 614 } 615 if (pcbddc->coarse_psi_B) { 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 617 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 619 } 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 621 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 623 } 624 } 625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 626 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 627 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 628 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 629 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 630 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 631 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 632 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 633 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 634 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 635 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 636 if (pcbddc->coarse_psi_B) { 637 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 638 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 639 } 640 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 641 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 642 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 643 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 644 } 645 /* free memory */ 646 if (n_vertices) { 647 ierr = PetscFree(vertices);CHKERRQ(ierr); 648 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 649 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 652 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 653 } 654 if (n_constraints) { 655 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 656 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 657 ierr = MatDestroy(&M1);CHKERRQ(ierr); 658 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 659 } 660 ierr = PetscFree(auxindices);CHKERRQ(ierr); 661 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 662 ierr = PCBDDCSetUpCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 663 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 669 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 670 { 671 PC_IS* pcis = (PC_IS*)(pc->data); 672 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 673 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 674 /* manage repeated solves */ 675 MatReuse reuse; 676 MatStructure matstruct; 677 PetscErrorCode ierr; 678 679 PetscFunctionBegin; 680 /* get mat flags */ 681 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 682 reuse = MAT_INITIAL_MATRIX; 683 if (pc->setupcalled) { 684 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 685 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 686 if (matstruct == SAME_NONZERO_PATTERN) { 687 reuse = MAT_REUSE_MATRIX; 688 } else { 689 reuse = MAT_INITIAL_MATRIX; 690 } 691 } 692 if (reuse == MAT_INITIAL_MATRIX) { 693 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 695 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 698 } 699 700 /* transform local matrices if needed */ 701 if (pcbddc->use_change_of_basis) { 702 Mat change_mat_all; 703 PetscScalar *row_cmat_values; 704 PetscInt *row_cmat_indices; 705 PetscInt *nnz,*is_indices,*temp_indices; 706 PetscInt i,j,k,n_D,n_B; 707 708 /* Get Non-overlapping dimensions */ 709 n_B = pcis->n_B; 710 n_D = pcis->n-n_B; 711 712 /* compute nonzero structure of change of basis on all local nodes */ 713 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 714 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 715 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 716 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 718 k=1; 719 for (i=0;i<n_B;i++) { 720 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 721 nnz[is_indices[i]]=j; 722 if (k < j) k = j; 723 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 724 } 725 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 /* assemble change of basis matrix on the whole set of local dofs */ 727 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 728 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 729 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 730 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 731 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 732 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 733 for (i=0;i<n_D;i++) { 734 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 735 } 736 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 738 for (i=0;i<n_B;i++) { 739 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 740 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 741 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 742 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 743 } 744 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 747 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 748 if (i==1) { 749 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 750 } else { 751 Mat work_mat; 752 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 753 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 754 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 755 } 756 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 757 ierr = PetscFree(nnz);CHKERRQ(ierr); 758 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 759 } else { 760 /* without change of basis, the local matrix is unchanged */ 761 if (!pcbddc->local_mat) { 762 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 763 pcbddc->local_mat = matis->A; 764 } 765 } 766 767 /* get submatrices */ 768 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 769 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 770 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 771 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 777 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 778 { 779 PC_IS* pcis = (PC_IS*)(pc->data); 780 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 781 IS is_aux1,is_aux2; 782 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 783 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 784 PetscBool *array_bool; 785 PetscErrorCode ierr; 786 787 PetscFunctionBegin; 788 /* Set Non-overlapping dimensions */ 789 n_B = pcis->n_B; n_D = pcis->n - n_B; 790 /* get vertex indices from constraint matrix */ 791 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 792 /* Set number of constraints */ 793 n_constraints = pcbddc->local_primal_size-n_vertices; 794 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 795 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&array_bool);CHKERRQ(ierr); 796 for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_TRUE; 797 for (i=0;i<n_vertices;i++) array_bool[vertices[i]] = PETSC_FALSE; 798 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 799 for (i=0, n_R=0; i<pcis->n; i++) { 800 if (array_bool[i]) { 801 idx_R_local[n_R] = i; 802 n_R++; 803 } 804 } 805 ierr = PetscFree(vertices);CHKERRQ(ierr); 806 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 807 808 /* print some info if requested */ 809 if (pcbddc->dbg_flag) { 810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 811 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 817 } 818 819 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 820 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 821 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0; i<n_D; i++) array_bool[is_indices[i]] = PETSC_FALSE; 824 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0, j=0; i<n_R; i++) { 826 if (array_bool[idx_R_local[i]]) { 827 aux_array1[j] = i; 828 j++; 829 } 830 } 831 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 832 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 for (i=0, j=0; i<n_B; i++) { 834 if (array_bool[is_indices[i]]) { 835 aux_array2[j] = i; j++; 836 } 837 } 838 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 839 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 840 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 841 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 842 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 843 844 if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) { 845 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 846 for (i=0, j=0; i<n_R; i++) { 847 if (!array_bool[idx_R_local[i]]) { 848 aux_array1[j] = i; 849 j++; 850 } 851 } 852 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 853 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 854 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 855 } 856 ierr = PetscFree(array_bool);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 862 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 863 { 864 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 865 866 PetscFunctionBegin; 867 pcbddc->use_exact_dirichlet=use; 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 873 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 874 { 875 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 876 PC_IS *pcis = (PC_IS*)pc->data; 877 PC pc_temp; 878 Mat A_RR; 879 Vec vec1,vec2,vec3; 880 MatStructure matstruct; 881 PetscScalar m_one = -1.0; 882 PetscReal value; 883 PetscInt n_D,n_R,use_exact,use_exact_reduced; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* Creating PC contexts for local Dirichlet and Neumann problems */ 888 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 889 890 /* DIRICHLET PROBLEM */ 891 /* Matrix for Dirichlet problem is pcis->A_II */ 892 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 893 if (!pcbddc->ksp_D) { /* create object if not yet build */ 894 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 895 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 896 /* default */ 897 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 898 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 899 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 900 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 901 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 902 } 903 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 904 /* Allow user's customization */ 905 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 906 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 907 if (!n_D) { 908 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 909 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 910 } 911 /* Set Up KSP for Dirichlet problem of BDDC */ 912 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 913 /* set ksp_D into pcis data */ 914 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 915 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 916 pcis->ksp_D = pcbddc->ksp_D; 917 918 /* NEUMANN PROBLEM */ 919 /* Matrix for Neumann problem is A_RR -> we need to create it */ 920 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 921 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 922 if (!pcbddc->ksp_R) { /* create object if not yet build */ 923 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 924 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 925 /* default */ 926 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 927 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 928 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 929 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 930 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 931 } 932 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 933 /* Allow user's customization */ 934 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 935 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 936 if (!n_R) { 937 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 938 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 939 } 940 /* Set Up KSP for Neumann problem of BDDC */ 941 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 942 943 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 944 945 /* Dirichlet */ 946 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 947 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 948 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 949 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 950 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 951 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 952 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 953 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 954 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 955 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 956 /* need to be adapted? */ 957 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 958 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 959 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 960 /* print info */ 961 if (pcbddc->dbg_flag) { 962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 963 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 967 } 968 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 969 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 970 } 971 972 /* Neumann */ 973 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 974 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 975 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 976 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 977 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 978 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 979 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 980 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 981 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 982 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 983 /* need to be adapted? */ 984 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 985 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 986 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 987 /* print info */ 988 if (pcbddc->dbg_flag) { 989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 991 } 992 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 993 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 994 } 995 996 /* free Neumann problem's matrix */ 997 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1003 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1004 { 1005 PetscErrorCode ierr; 1006 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1007 1008 PetscFunctionBegin; 1009 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1010 if (pcbddc->local_auxmat1) { 1011 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1012 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1019 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1020 { 1021 PetscErrorCode ierr; 1022 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1023 PC_IS* pcis = (PC_IS*) (pc->data); 1024 const PetscScalar zero = 0.0; 1025 1026 PetscFunctionBegin; 1027 /* Application of PHI^T (or PSI^T) */ 1028 if (pcbddc->coarse_psi_B) { 1029 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1030 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1031 } else { 1032 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1033 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1034 } 1035 /* Scatter data of coarse_rhs */ 1036 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1037 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 1039 /* Local solution on R nodes */ 1040 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1041 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1042 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1043 if (pcbddc->inexact_prec_type) { 1044 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1046 } 1047 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1048 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1049 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 if (pcbddc->inexact_prec_type) { 1052 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 } 1055 1056 /* Coarse solution */ 1057 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1059 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1060 } 1061 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1062 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1063 1064 /* Sum contributions from two levels */ 1065 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1066 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1072 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1073 { 1074 PetscErrorCode ierr; 1075 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1076 1077 PetscFunctionBegin; 1078 switch (pcbddc->coarse_communications_type) { 1079 case SCATTERS_BDDC: 1080 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1081 break; 1082 case GATHERS_BDDC: 1083 break; 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1090 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1091 { 1092 PetscErrorCode ierr; 1093 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1094 PetscScalar* array_to; 1095 PetscScalar* array_from; 1096 MPI_Comm comm; 1097 PetscInt i; 1098 1099 PetscFunctionBegin; 1100 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1101 switch (pcbddc->coarse_communications_type) { 1102 case SCATTERS_BDDC: 1103 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1104 break; 1105 case GATHERS_BDDC: 1106 if (vec_from) { 1107 ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr); 1108 } 1109 if (vec_to) { 1110 ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr); 1111 } 1112 switch(pcbddc->coarse_problem_type){ 1113 case SEQUENTIAL_BDDC: 1114 if (smode == SCATTER_FORWARD) { 1115 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1116 if (vec_to) { 1117 if (imode == ADD_VALUES) { 1118 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1119 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1120 } 1121 } else { 1122 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1123 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1124 } 1125 } 1126 } 1127 } else { 1128 if (vec_from) { 1129 if (imode == ADD_VALUES) { 1130 MPI_Comm vec_from_comm; 1131 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr); 1132 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type); 1133 } 1134 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1135 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1136 } 1137 } 1138 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1139 } 1140 break; 1141 case REPLICATED_BDDC: 1142 if (smode == SCATTER_FORWARD) { 1143 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1144 if (imode == ADD_VALUES) { 1145 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1146 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1147 } 1148 } else { 1149 for (i=0;i<pcbddc->replicated_primal_size;i++) { 1150 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 1151 } 1152 } 1153 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1154 if (imode == ADD_VALUES) { 1155 for (i=0;i<pcbddc->local_primal_size;i++) { 1156 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 1157 } 1158 } else { 1159 for (i=0;i<pcbddc->local_primal_size;i++) { 1160 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1161 } 1162 } 1163 } 1164 break; 1165 case MULTILEVEL_BDDC: 1166 break; 1167 case PARALLEL_BDDC: 1168 break; 1169 } 1170 if (vec_from) { 1171 ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr); 1172 } 1173 if (vec_to) { 1174 ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr); 1175 } 1176 break; 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 /* uncomment for testing purposes */ 1182 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1185 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1186 { 1187 PetscErrorCode ierr; 1188 PC_IS* pcis = (PC_IS*)(pc->data); 1189 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1190 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1191 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1192 MatType impMatType=MATSEQAIJ; 1193 /* one and zero */ 1194 PetscScalar one=1.0,zero=0.0; 1195 /* space to store constraints and their local indices */ 1196 PetscScalar *temp_quadrature_constraint; 1197 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1198 /* iterators */ 1199 PetscInt i,j,k,total_counts,temp_start_ptr; 1200 /* stuff to store connected components stored in pcbddc->mat_graph */ 1201 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1202 PetscInt n_ISForFaces,n_ISForEdges; 1203 PetscBool get_faces,get_edges,get_vertices; 1204 /* near null space stuff */ 1205 MatNullSpace nearnullsp; 1206 const Vec *nearnullvecs; 1207 Vec *localnearnullsp; 1208 PetscBool nnsp_has_cnst; 1209 PetscInt nnsp_size; 1210 PetscScalar *array; 1211 /* BLAS integers */ 1212 PetscBLASInt lwork,lierr; 1213 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1214 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1215 /* LAPACK working arrays for SVD or POD */ 1216 PetscBool skip_lapack; 1217 PetscScalar *work; 1218 PetscReal *singular_vals; 1219 #if defined(PETSC_USE_COMPLEX) 1220 PetscReal *rwork; 1221 #endif 1222 #if defined(PETSC_MISSING_LAPACK_GESVD) 1223 PetscBLASInt Blas_one_2=1; 1224 PetscScalar *temp_basis,*correlation_mat; 1225 #else 1226 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1227 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1228 #endif 1229 /* change of basis */ 1230 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1231 PetscBool boolforchange,*change_basis,*touched; 1232 /* auxiliary stuff */ 1233 PetscInt *nnz,*is_indices,*local_to_B; 1234 /* some quantities */ 1235 PetscInt n_vertices,total_primal_vertices; 1236 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1237 1238 1239 PetscFunctionBegin; 1240 /* Get index sets for faces, edges and vertices from graph */ 1241 get_faces = PETSC_TRUE; 1242 get_edges = PETSC_TRUE; 1243 get_vertices = PETSC_TRUE; 1244 if (pcbddc->vertices_flag) { 1245 get_faces = PETSC_FALSE; 1246 get_edges = PETSC_FALSE; 1247 } 1248 if (pcbddc->constraints_flag) { 1249 get_vertices = PETSC_FALSE; 1250 } 1251 if (pcbddc->faces_flag) { 1252 get_edges = PETSC_FALSE; 1253 } 1254 if (pcbddc->edges_flag) { 1255 get_faces = PETSC_FALSE; 1256 } 1257 /* default */ 1258 if (!get_faces && !get_edges && !get_vertices) { 1259 get_vertices = PETSC_TRUE; 1260 } 1261 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1262 /* print some info */ 1263 if (pcbddc->dbg_flag) { 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1265 i = 0; 1266 if (ISForVertices) { 1267 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1268 } 1269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1273 } 1274 /* check if near null space is attached to global mat */ 1275 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1276 if (nearnullsp) { 1277 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1278 } else { /* if near null space is not provided BDDC uses constants by default */ 1279 nnsp_size = 0; 1280 nnsp_has_cnst = PETSC_TRUE; 1281 } 1282 /* get max number of constraints on a single cc */ 1283 max_constraints = nnsp_size; 1284 if (nnsp_has_cnst) max_constraints++; 1285 1286 /* 1287 Evaluate maximum storage size needed by the procedure 1288 - temp_indices will contain start index of each constraint stored as follows 1289 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1290 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1291 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1292 */ 1293 total_counts = n_ISForFaces+n_ISForEdges; 1294 total_counts *= max_constraints; 1295 n_vertices = 0; 1296 if (ISForVertices) { 1297 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1298 } 1299 total_counts += n_vertices; 1300 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1301 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 1302 total_counts = 0; 1303 max_size_of_constraint = 0; 1304 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1305 if (i<n_ISForEdges) { 1306 used_IS = &ISForEdges[i]; 1307 } else { 1308 used_IS = &ISForFaces[i-n_ISForEdges]; 1309 } 1310 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1311 total_counts += j; 1312 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1313 } 1314 total_counts *= max_constraints; 1315 total_counts += n_vertices; 1316 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1317 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1318 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1319 /* local to boundary numbering */ 1320 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1321 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1322 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1323 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1324 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 /* get local part of global near null space vectors */ 1326 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1327 for (k=0;k<nnsp_size;k++) { 1328 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1329 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331 } 1332 1333 /* whether or not to skip lapack calls */ 1334 skip_lapack = PETSC_TRUE; 1335 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1336 1337 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1338 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1339 PetscScalar temp_work; 1340 #if defined(PETSC_MISSING_LAPACK_GESVD) 1341 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1342 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1343 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1344 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1345 #if defined(PETSC_USE_COMPLEX) 1346 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1347 #endif 1348 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1349 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1350 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1351 lwork = -1; 1352 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1353 #if !defined(PETSC_USE_COMPLEX) 1354 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1355 #else 1356 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1357 #endif 1358 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1359 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1360 #else /* on missing GESVD */ 1361 /* SVD */ 1362 PetscInt max_n,min_n; 1363 max_n = max_size_of_constraint; 1364 min_n = max_constraints; 1365 if (max_size_of_constraint < max_constraints) { 1366 min_n = max_size_of_constraint; 1367 max_n = max_constraints; 1368 } 1369 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1370 #if defined(PETSC_USE_COMPLEX) 1371 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1372 #endif 1373 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1374 lwork = -1; 1375 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1376 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1377 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1378 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1379 #if !defined(PETSC_USE_COMPLEX) 1380 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1381 #else 1382 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1383 #endif 1384 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1385 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1386 #endif /* on missing GESVD */ 1387 /* Allocate optimal workspace */ 1388 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1389 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1390 } 1391 /* Now we can loop on constraining sets */ 1392 total_counts = 0; 1393 temp_indices[0] = 0; 1394 /* vertices */ 1395 if (ISForVertices) { 1396 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1397 if (nnsp_has_cnst) { /* consider all vertices */ 1398 for (i=0;i<n_vertices;i++) { 1399 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1400 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1401 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1402 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1403 change_basis[total_counts]=PETSC_FALSE; 1404 total_counts++; 1405 } 1406 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1407 PetscBool used_vertex; 1408 for (i=0;i<n_vertices;i++) { 1409 used_vertex = PETSC_FALSE; 1410 k = 0; 1411 while (!used_vertex && k<nnsp_size) { 1412 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1413 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1414 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1415 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1416 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1417 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1418 change_basis[total_counts]=PETSC_FALSE; 1419 total_counts++; 1420 used_vertex = PETSC_TRUE; 1421 } 1422 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1423 k++; 1424 } 1425 } 1426 } 1427 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1428 n_vertices = total_counts; 1429 } 1430 1431 /* edges and faces */ 1432 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1433 if (i<n_ISForEdges) { 1434 used_IS = &ISForEdges[i]; 1435 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1436 } else { 1437 used_IS = &ISForFaces[i-n_ISForEdges]; 1438 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1439 } 1440 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1441 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1442 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1443 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1444 /* change of basis should not be performed on local periodic nodes */ 1445 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1446 if (nnsp_has_cnst) { 1447 PetscScalar quad_value; 1448 temp_constraints++; 1449 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1450 for (j=0;j<size_of_constraint;j++) { 1451 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1452 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1453 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1454 } 1455 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1456 change_basis[total_counts]=boolforchange; 1457 total_counts++; 1458 } 1459 for (k=0;k<nnsp_size;k++) { 1460 PetscReal real_value; 1461 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1462 for (j=0;j<size_of_constraint;j++) { 1463 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1464 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1465 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1466 } 1467 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1468 /* check if array is null on the connected component */ 1469 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1470 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1471 if (real_value > 0.0) { /* keep indices and values */ 1472 temp_constraints++; 1473 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1474 change_basis[total_counts]=boolforchange; 1475 total_counts++; 1476 } 1477 } 1478 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1479 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1480 if (!pcbddc->use_nnsp_true) { 1481 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1482 1483 #if defined(PETSC_MISSING_LAPACK_GESVD) 1484 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1485 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1486 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1487 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1488 from that computed using LAPACKgesvd 1489 -> This is due to a different computation of eigenvectors in LAPACKheev 1490 -> The quality of the POD-computed basis will be the same */ 1491 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1492 /* Store upper triangular part of correlation matrix */ 1493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1494 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1495 for (j=0;j<temp_constraints;j++) { 1496 for (k=0;k<j+1;k++) { 1497 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1498 } 1499 } 1500 /* compute eigenvalues and eigenvectors of correlation matrix */ 1501 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1502 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1503 #if !defined(PETSC_USE_COMPLEX) 1504 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1505 #else 1506 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1507 #endif 1508 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1510 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1511 j=0; 1512 while (j < temp_constraints && singular_vals[j] < tol) j++; 1513 total_counts=total_counts-j; 1514 /* scale and copy POD basis into used quadrature memory */ 1515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1516 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1517 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1519 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1521 if (j<temp_constraints) { 1522 PetscInt ii; 1523 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1524 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1525 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1527 for (k=0;k<temp_constraints-j;k++) { 1528 for (ii=0;ii<size_of_constraint;ii++) { 1529 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1530 } 1531 } 1532 } 1533 #else /* on missing GESVD */ 1534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1535 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1538 #if !defined(PETSC_USE_COMPLEX) 1539 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1540 #else 1541 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1542 #endif 1543 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1544 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1545 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1546 k = temp_constraints; 1547 if (k > size_of_constraint) k = size_of_constraint; 1548 j = 0; 1549 while (j < k && singular_vals[k-j-1] < tol) j++; 1550 total_counts = total_counts-temp_constraints+k-j; 1551 #endif /* on missing GESVD */ 1552 } 1553 } 1554 /* free index sets of faces, edges and vertices */ 1555 for (i=0;i<n_ISForFaces;i++) { 1556 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1557 } 1558 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1559 for (i=0;i<n_ISForEdges;i++) { 1560 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1561 } 1562 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1563 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1564 1565 /* free workspace */ 1566 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1567 ierr = PetscFree(work);CHKERRQ(ierr); 1568 #if defined(PETSC_USE_COMPLEX) 1569 ierr = PetscFree(rwork);CHKERRQ(ierr); 1570 #endif 1571 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1572 #if defined(PETSC_MISSING_LAPACK_GESVD) 1573 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1574 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1575 #endif 1576 } 1577 for (k=0;k<nnsp_size;k++) { 1578 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1579 } 1580 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1581 1582 /* set quantities in pcbddc data structure */ 1583 /* n_vertices defines the number of subdomain corners in the primal space */ 1584 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1585 pcbddc->local_primal_size = total_counts; 1586 pcbddc->n_vertices = n_vertices; 1587 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1588 1589 /* Create constraint matrix */ 1590 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1591 /* so we need to set it up properly either with or without change of basis */ 1592 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1593 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1594 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1595 /* array to compute a local numbering of constraints : vertices first then constraints */ 1596 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1597 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1598 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1599 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1600 /* auxiliary stuff for basis change */ 1601 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1602 ierr = PetscMalloc(pcis->n_B*sizeof(PetscBool),&touched);CHKERRQ(ierr); 1603 ierr = PetscMemzero(touched,pcis->n_B*sizeof(PetscBool));CHKERRQ(ierr); 1604 1605 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1606 total_primal_vertices=0; 1607 for (i=0;i<pcbddc->local_primal_size;i++) { 1608 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1609 if (size_of_constraint == 1) { 1610 touched[temp_indices_to_constraint_B[temp_indices[i]]]=PETSC_TRUE; 1611 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1612 aux_primal_minloc[total_primal_vertices]=0; 1613 total_primal_vertices++; 1614 } else if (change_basis[i]) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1615 PetscInt min_loc,min_index; 1616 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1617 /* find first untouched local node */ 1618 k = 0; 1619 while (touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) k++; 1620 min_index = global_indices[k]; 1621 min_loc = k; 1622 /* search the minimum among global nodes already untouched on the cc */ 1623 for (k=1;k<size_of_constraint;k++) { 1624 /* there can be more than one constraint on a single connected component */ 1625 if (min_index > global_indices[k] && !touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) { 1626 min_index = global_indices[k]; 1627 min_loc = k; 1628 } 1629 } 1630 touched[temp_indices_to_constraint_B[temp_indices[i]+min_loc]] = PETSC_TRUE; 1631 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1632 aux_primal_minloc[total_primal_vertices]=min_loc; 1633 total_primal_vertices++; 1634 } 1635 } 1636 /* free workspace */ 1637 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1638 ierr = PetscFree(touched);CHKERRQ(ierr); 1639 /* permute indices in order to have a sorted set of vertices */ 1640 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1641 1642 /* nonzero structure of constraint matrix */ 1643 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1644 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1645 j=total_primal_vertices; 1646 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1647 if (!change_basis[i]) { 1648 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1649 j++; 1650 } 1651 } 1652 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1653 ierr = PetscFree(nnz);CHKERRQ(ierr); 1654 /* set values in constraint matrix */ 1655 for (i=0;i<total_primal_vertices;i++) { 1656 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 total_counts = total_primal_vertices; 1659 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1660 if (!change_basis[i]) { 1661 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1662 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1663 total_counts++; 1664 } 1665 } 1666 /* assembling */ 1667 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 /* 1670 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1671 */ 1672 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1673 if (pcbddc->use_change_of_basis) { 1674 PetscBool qr_needed = PETSC_FALSE; 1675 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1676 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1677 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1678 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1679 /* work arrays */ 1680 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1681 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1682 /* nonzeros per row */ 1683 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1684 if (change_basis[i]) { 1685 qr_needed = PETSC_TRUE; 1686 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1687 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1688 } 1689 } 1690 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1691 ierr = PetscFree(nnz);CHKERRQ(ierr); 1692 /* Set initial identity in the matrix */ 1693 for (i=0;i<pcis->n_B;i++) { 1694 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1695 } 1696 1697 /* Now we loop on the constraints which need a change of basis */ 1698 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1699 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1700 /* Change of basis matrix T computed via QR decomposition of constraints */ 1701 if (qr_needed) { 1702 /* dual and primal dofs on a single cc */ 1703 PetscInt dual_dofs,primal_dofs; 1704 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1705 PetscInt primal_counter; 1706 /* working stuff for GEQRF */ 1707 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1708 PetscBLASInt lqr_work; 1709 /* working stuff for UNGQR */ 1710 PetscScalar *gqr_work,lgqr_work_t; 1711 PetscBLASInt lgqr_work; 1712 /* working stuff for TRTRS */ 1713 PetscScalar *trs_rhs; 1714 PetscBLASInt Blas_NRHS; 1715 /* pointers for values insertion into change of basis matrix */ 1716 PetscInt *start_rows,*start_cols; 1717 PetscScalar *start_vals; 1718 /* working stuff for values insertion */ 1719 PetscBool *is_primal; 1720 1721 /* space to store Q */ 1722 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1723 /* first we issue queries for optimal work */ 1724 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1725 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1726 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1727 lqr_work = -1; 1728 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1729 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1730 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1731 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1732 lgqr_work = -1; 1733 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1734 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1735 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1736 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1737 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1738 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1740 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1741 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1742 /* array to store scaling factors for reflectors */ 1743 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1744 /* array to store rhs and solution of triangular solver */ 1745 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1746 /* array to store whether a node is primal or not */ 1747 ierr = PetscMalloc(pcis->n_B*sizeof(*is_primal),&is_primal);CHKERRQ(ierr); 1748 ierr = PetscMemzero(is_primal,pcis->n_B*sizeof(*is_primal));CHKERRQ(ierr); 1749 for (i=0;i<total_primal_vertices;i++) is_primal[local_to_B[aux_primal_numbering[i]]] = PETSC_TRUE; 1750 1751 /* allocating workspace for check */ 1752 if (pcbddc->dbg_flag) { 1753 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1754 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1755 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1756 } 1757 1758 /* loop on constraints and see whether or not they need a change of basis */ 1759 /* -> using implicit ordering contained in temp_indices data */ 1760 total_counts = pcbddc->n_vertices; 1761 primal_counter = total_counts; 1762 while (total_counts<pcbddc->local_primal_size) { 1763 primal_dofs = 1; 1764 if (change_basis[total_counts]) { 1765 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1766 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1767 primal_dofs++; 1768 } 1769 /* get constraint info */ 1770 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1771 dual_dofs = size_of_constraint-primal_dofs; 1772 1773 /* copy quadrature constraints for change of basis check */ 1774 if (pcbddc->dbg_flag) { 1775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1776 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1777 } 1778 1779 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1780 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1781 1782 /* compute QR decomposition of constraints */ 1783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1784 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1786 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1787 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1788 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1790 1791 /* explictly compute R^-T */ 1792 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1793 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1794 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1795 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1796 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1797 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1798 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1799 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1800 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1801 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1802 1803 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1805 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1806 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1807 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1808 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1809 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1812 1813 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1814 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1815 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1817 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1818 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1819 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1820 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1821 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1822 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1823 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1824 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1825 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1826 1827 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1828 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1829 /* insert cols for primal dofs */ 1830 for (j=0;j<primal_dofs;j++) { 1831 start_vals = &qr_basis[j*size_of_constraint]; 1832 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1833 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1834 } 1835 /* insert cols for dual dofs */ 1836 for (j=0,k=0;j<dual_dofs;k++) { 1837 if (!is_primal[temp_indices_to_constraint_B[temp_indices[total_counts]+k]]) { 1838 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1839 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1840 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1841 j++; 1842 } 1843 } 1844 1845 /* check change of basis */ 1846 if (pcbddc->dbg_flag) { 1847 PetscInt ii,jj; 1848 PetscBool valid_qr=PETSC_TRUE; 1849 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1856 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1857 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1858 for (jj=0;jj<size_of_constraint;jj++) { 1859 for (ii=0;ii<primal_dofs;ii++) { 1860 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1861 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1862 } 1863 } 1864 if (!valid_qr) { 1865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1866 for (jj=0;jj<size_of_constraint;jj++) { 1867 for (ii=0;ii<primal_dofs;ii++) { 1868 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1869 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1870 } 1871 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1872 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1873 } 1874 } 1875 } 1876 } else { 1877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1878 } 1879 } 1880 /* increment primal counter */ 1881 primal_counter += primal_dofs; 1882 } else { 1883 if (pcbddc->dbg_flag) { 1884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1885 } 1886 } 1887 /* increment constraint counter total_counts */ 1888 total_counts += primal_dofs; 1889 } 1890 if (pcbddc->dbg_flag) { 1891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1892 ierr = PetscFree(work);CHKERRQ(ierr); 1893 } 1894 /* free workspace */ 1895 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1896 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1897 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1898 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1899 ierr = PetscFree(is_primal);CHKERRQ(ierr); 1900 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1901 } 1902 /* assembling */ 1903 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1905 /* 1906 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1907 */ 1908 } 1909 /* free workspace */ 1910 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1911 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1912 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1913 ierr = PetscFree(change_basis);CHKERRQ(ierr); 1914 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1915 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1916 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1917 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1923 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1924 { 1925 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1926 PC_IS *pcis = (PC_IS*)pc->data; 1927 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1928 PetscInt bs,ierr,i,vertex_size; 1929 PetscViewer viewer=pcbddc->dbg_viewer; 1930 1931 PetscFunctionBegin; 1932 /* Init local Graph struct */ 1933 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1934 1935 /* Check validity of the csr graph passed in by the user */ 1936 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1937 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1938 } 1939 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1940 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1941 Mat mat_adj; 1942 const PetscInt *xadj,*adjncy; 1943 PetscBool flg_row=PETSC_TRUE; 1944 1945 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1946 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1947 if (!flg_row) { 1948 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1949 } 1950 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1951 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1952 if (!flg_row) { 1953 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1954 } 1955 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1956 } 1957 1958 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1959 vertex_size = 1; 1960 if (!pcbddc->n_ISForDofs) { 1961 IS *custom_ISForDofs; 1962 1963 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1964 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1965 for (i=0;i<bs;i++) { 1966 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1967 } 1968 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1969 /* remove my references to IS objects */ 1970 for (i=0;i<bs;i++) { 1971 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1972 } 1973 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1974 } else { /* mat block size as vertex size (used for elasticity) */ 1975 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1976 } 1977 1978 /* Setup of Graph */ 1979 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1980 1981 /* Graph's connected components analysis */ 1982 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1983 1984 /* print some info to stdout */ 1985 if (pcbddc->dbg_flag) { 1986 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1993 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[]) 1994 { 1995 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1996 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 n = 0; 2001 vertices = 0; 2002 if (pcbddc->ConstraintMatrix) { 2003 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2004 for (i=0;i<local_primal_size;i++) { 2005 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2006 if (size_of_constraint == 1) n++; 2007 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2008 } 2009 if (vertices_idx) { 2010 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2011 n = 0; 2012 for (i=0;i<local_primal_size;i++) { 2013 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2014 if (size_of_constraint == 1) { 2015 vertices[n++]=row_cmat_indices[0]; 2016 } 2017 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2018 } 2019 } 2020 } 2021 *n_vertices = n; 2022 if (vertices_idx) *vertices_idx = vertices; 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2028 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[]) 2029 { 2030 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2031 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2032 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2033 PetscBool *touched; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 n = 0; 2038 constraints_index = 0; 2039 if (pcbddc->ConstraintMatrix) { 2040 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2041 max_size_of_constraint = 0; 2042 for (i=0;i<local_primal_size;i++) { 2043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2044 if (size_of_constraint > 1) { 2045 n++; 2046 } 2047 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2048 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2049 } 2050 if (constraints_idx) { 2051 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2052 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2053 ierr = PetscMalloc(local_size*sizeof(PetscBool),&touched);CHKERRQ(ierr); 2054 ierr = PetscMemzero(touched,local_size*sizeof(PetscBool));CHKERRQ(ierr); 2055 n = 0; 2056 for (i=0;i<local_primal_size;i++) { 2057 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2058 if (size_of_constraint > 1) { 2059 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2060 /* find first untouched local node */ 2061 j = 0; 2062 while(touched[row_cmat_indices[j]]) j++; 2063 min_index = row_cmat_global_indices[j]; 2064 min_loc = j; 2065 /* search the minimum among nodes not yet touched on the connected component 2066 since there can be more than one constraint on a single cc */ 2067 for (j=1;j<size_of_constraint;j++) { 2068 if (min_index > row_cmat_global_indices[j] && !touched[row_cmat_indices[j]]) { 2069 min_index = row_cmat_global_indices[j]; 2070 min_loc = j; 2071 } 2072 } 2073 touched[row_cmat_indices[min_loc]] = PETSC_TRUE; 2074 constraints_index[n++] = row_cmat_indices[min_loc]; 2075 } 2076 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2077 } 2078 ierr = PetscFree(touched);CHKERRQ(ierr); 2079 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2080 } 2081 } 2082 *n_constraints = n; 2083 if (constraints_idx) *constraints_idx = constraints_index; 2084 PetscFunctionReturn(0); 2085 } 2086 2087 /* the next two functions has been adapted from pcis.c */ 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "PCBDDCApplySchur" 2090 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2091 { 2092 PetscErrorCode ierr; 2093 PC_IS *pcis = (PC_IS*)(pc->data); 2094 2095 PetscFunctionBegin; 2096 if (!vec2_B) { vec2_B = v; } 2097 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2098 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2099 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2100 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2101 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2107 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2108 { 2109 PetscErrorCode ierr; 2110 PC_IS *pcis = (PC_IS*)(pc->data); 2111 2112 PetscFunctionBegin; 2113 if (!vec2_B) { vec2_B = v; } 2114 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2115 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2116 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2117 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2118 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "PCBDDCSubsetNumbering" 2124 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2125 { 2126 Vec local_vec,global_vec; 2127 IS seqis,paris; 2128 VecScatter scatter_ctx; 2129 PetscScalar *array; 2130 PetscInt *temp_global_dofs; 2131 PetscScalar globalsum; 2132 PetscInt i,j,s; 2133 PetscInt nlocals,first_index,old_index,max_local; 2134 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2135 PetscMPIInt *dof_sizes,*dof_displs; 2136 PetscBool first_found; 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 /* mpi buffers */ 2141 MPI_Comm_size(comm,&size_prec_comm); 2142 MPI_Comm_rank(comm,&rank_prec_comm); 2143 j = ( !rank_prec_comm ? size_prec_comm : 0); 2144 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2145 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2146 /* get maximum size of subset */ 2147 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2148 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2149 max_local = 0; 2150 if (n_local_dofs) { 2151 max_local = temp_global_dofs[0]; 2152 for (i=1;i<n_local_dofs;i++) { 2153 if (max_local < temp_global_dofs[i] ) { 2154 max_local = temp_global_dofs[i]; 2155 } 2156 } 2157 } 2158 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2159 max_global++; 2160 max_local = 0; 2161 if (n_local_dofs) { 2162 max_local = local_dofs[0]; 2163 for (i=1;i<n_local_dofs;i++) { 2164 if (max_local < local_dofs[i] ) { 2165 max_local = local_dofs[i]; 2166 } 2167 } 2168 } 2169 max_local++; 2170 /* allocate workspace */ 2171 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2172 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2173 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2174 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2175 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2176 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2177 /* create scatter */ 2178 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2179 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2180 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2181 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2182 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2183 /* init array */ 2184 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2185 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2186 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2187 if (local_dofs_mult) { 2188 for (i=0;i<n_local_dofs;i++) { 2189 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2190 } 2191 } else { 2192 for (i=0;i<n_local_dofs;i++) { 2193 array[local_dofs[i]]=1.0; 2194 } 2195 } 2196 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2197 /* scatter into global vec and get total number of global dofs */ 2198 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2199 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2200 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2201 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2202 /* Fill global_vec with cumulative function for global numbering */ 2203 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2204 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2205 nlocals = 0; 2206 first_index = -1; 2207 first_found = PETSC_FALSE; 2208 for (i=0;i<s;i++) { 2209 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2210 first_found = PETSC_TRUE; 2211 first_index = i; 2212 } 2213 nlocals += (PetscInt)PetscRealPart(array[i]); 2214 } 2215 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2216 if (!rank_prec_comm) { 2217 dof_displs[0]=0; 2218 for (i=1;i<size_prec_comm;i++) { 2219 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2220 } 2221 } 2222 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2223 if (first_found) { 2224 array[first_index] += (PetscScalar)nlocals; 2225 old_index = first_index; 2226 for (i=first_index+1;i<s;i++) { 2227 if (PetscRealPart(array[i]) > 0.0) { 2228 array[i] += array[old_index]; 2229 old_index = i; 2230 } 2231 } 2232 } 2233 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2234 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2235 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2236 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2237 /* get global ordering of local dofs */ 2238 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2239 if (local_dofs_mult) { 2240 for (i=0;i<n_local_dofs;i++) { 2241 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2242 } 2243 } else { 2244 for (i=0;i<n_local_dofs;i++) { 2245 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2246 } 2247 } 2248 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2249 /* free workspace */ 2250 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2251 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2252 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2253 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2254 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2255 /* return pointer to global ordering of local dofs */ 2256 *global_numbering_subset = temp_global_dofs; 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2262 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2263 { 2264 PetscInt i,j; 2265 PetscScalar *alphas; 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 /* this implements stabilized Gram-Schmidt */ 2270 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2271 for (i=0;i<n;i++) { 2272 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2273 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2274 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2275 } 2276 ierr = PetscFree(alphas);CHKERRQ(ierr); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 /* BDDC requires metis 5.0.1 for multilevel */ 2281 #if defined(PETSC_HAVE_METIS) 2282 #include "metis.h" 2283 #define MetisInt idx_t 2284 #define MetisScalar real_t 2285 #endif 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "PCBDDCSetUpCoarseEnvironment" 2289 PetscErrorCode PCBDDCSetUpCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2290 { 2291 2292 2293 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2294 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2295 PC_IS *pcis = (PC_IS*)pc->data; 2296 MPI_Comm prec_comm; 2297 MPI_Comm coarse_comm; 2298 2299 MatNullSpace CoarseNullSpace; 2300 2301 /* common to all choiches */ 2302 PetscScalar *temp_coarse_mat_vals; 2303 PetscScalar *ins_coarse_mat_vals; 2304 PetscInt *ins_local_primal_indices; 2305 PetscMPIInt *localsizes2,*localdispl2; 2306 PetscMPIInt size_prec_comm; 2307 PetscMPIInt rank_prec_comm; 2308 PetscMPIInt active_rank=MPI_PROC_NULL; 2309 PetscMPIInt master_proc=0; 2310 PetscInt ins_local_primal_size; 2311 /* specific to MULTILEVEL_BDDC */ 2312 PetscMPIInt *ranks_recv=0; 2313 PetscMPIInt count_recv=0; 2314 PetscMPIInt rank_coarse_proc_send_to=-1; 2315 PetscMPIInt coarse_color = MPI_UNDEFINED; 2316 ISLocalToGlobalMapping coarse_ISLG; 2317 /* some other variables */ 2318 PetscErrorCode ierr; 2319 MatType coarse_mat_type; 2320 PCType coarse_pc_type; 2321 KSPType coarse_ksp_type; 2322 PC pc_temp; 2323 PetscInt i,j,k; 2324 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2325 /* verbose output viewer */ 2326 PetscViewer viewer=pcbddc->dbg_viewer; 2327 PetscInt dbg_flag=pcbddc->dbg_flag; 2328 2329 PetscInt offset,offset2; 2330 PetscMPIInt im_active,active_procs; 2331 PetscInt *dnz,*onz; 2332 2333 PetscBool setsym,issym=PETSC_FALSE; 2334 2335 PetscFunctionBegin; 2336 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2337 ins_local_primal_indices = 0; 2338 ins_coarse_mat_vals = 0; 2339 localsizes2 = 0; 2340 localdispl2 = 0; 2341 temp_coarse_mat_vals = 0; 2342 coarse_ISLG = 0; 2343 2344 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2345 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2346 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2347 2348 /* Assign global numbering to coarse dofs */ 2349 { 2350 PetscInt *auxlocal_primal,*aux_idx; 2351 PetscMPIInt mpi_local_primal_size; 2352 PetscScalar coarsesum,*array; 2353 2354 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2355 2356 /* Construct needed data structures for message passing */ 2357 j = 0; 2358 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2359 j = size_prec_comm; 2360 } 2361 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2362 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2363 /* Gather local_primal_size information for all processes */ 2364 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2365 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2366 } else { 2367 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2368 } 2369 pcbddc->replicated_primal_size = 0; 2370 for (i=0; i<j; i++) { 2371 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2372 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2373 } 2374 2375 /* First let's count coarse dofs. 2376 This code fragment assumes that the number of local constraints per connected component 2377 is not greater than the number of nodes defined for the connected component 2378 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2379 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2380 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2381 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2382 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2383 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2384 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2385 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2386 /* Compute number of coarse dofs */ 2387 ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&pcbddc->coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr); 2388 2389 if (dbg_flag) { 2390 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2391 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2392 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 2393 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2394 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2395 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 2396 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2397 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2398 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2399 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2400 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2401 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2402 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2403 for (i=0;i<pcis->n;i++) { 2404 if (array[i] == 1.0) { 2405 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 2406 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 2407 } 2408 } 2409 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2410 for (i=0;i<pcis->n;i++) { 2411 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2412 } 2413 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2414 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2415 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2416 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2417 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2418 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 2419 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2420 } 2421 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2422 } 2423 2424 if (dbg_flag) { 2425 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 2426 if (dbg_flag > 1) { 2427 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2428 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2429 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2430 for (i=0;i<pcbddc->local_primal_size;i++) { 2431 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 2432 } 2433 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2434 } 2435 } 2436 2437 im_active = 0; 2438 if (pcis->n) im_active = 1; 2439 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2440 2441 /* adapt coarse problem type */ 2442 #if defined(PETSC_HAVE_METIS) 2443 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2444 if (pcbddc->current_level < pcbddc->max_levels) { 2445 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2446 if (dbg_flag) { 2447 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2448 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2449 } 2450 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2451 } 2452 } else { 2453 if (dbg_flag) { 2454 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2455 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2456 } 2457 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2458 } 2459 } 2460 #else 2461 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2462 #endif 2463 2464 switch(pcbddc->coarse_problem_type){ 2465 2466 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2467 { 2468 #if defined(PETSC_HAVE_METIS) 2469 /* we need additional variables */ 2470 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2471 MetisInt *metis_coarse_subdivision; 2472 MetisInt options[METIS_NOPTIONS]; 2473 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2474 PetscMPIInt procs_jumps_coarse_comm; 2475 PetscMPIInt *coarse_subdivision; 2476 PetscMPIInt *total_count_recv; 2477 PetscMPIInt *total_ranks_recv; 2478 PetscMPIInt *displacements_recv; 2479 PetscMPIInt *my_faces_connectivity; 2480 PetscMPIInt *petsc_faces_adjncy; 2481 MetisInt *faces_adjncy; 2482 MetisInt *faces_xadj; 2483 PetscMPIInt *number_of_faces; 2484 PetscMPIInt *faces_displacements; 2485 PetscInt *array_int; 2486 PetscMPIInt my_faces=0; 2487 PetscMPIInt total_faces=0; 2488 PetscInt ranks_stretching_ratio; 2489 2490 /* define some quantities */ 2491 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2492 coarse_mat_type = MATIS; 2493 coarse_pc_type = PCBDDC; 2494 coarse_ksp_type = KSPRICHARDSON; 2495 2496 /* details of coarse decomposition */ 2497 n_subdomains = active_procs; 2498 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2499 ranks_stretching_ratio = size_prec_comm/active_procs; 2500 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2501 2502 #if 0 2503 PetscMPIInt *old_ranks; 2504 PetscInt *new_ranks,*jj,*ii; 2505 MatPartitioning mat_part; 2506 IS coarse_new_decomposition,is_numbering; 2507 PetscViewer viewer_test; 2508 MPI_Comm test_coarse_comm; 2509 PetscMPIInt test_coarse_color; 2510 Mat mat_adj; 2511 /* Create new communicator for coarse problem splitting the old one */ 2512 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2513 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2514 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2515 test_coarse_comm = MPI_COMM_NULL; 2516 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2517 if (im_active) { 2518 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2519 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2520 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2521 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2522 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2523 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2524 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2525 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2526 k = pcis->n_neigh-1; 2527 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2528 ii[0]=0; 2529 ii[1]=k; 2530 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2531 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2532 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2533 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2534 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2535 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2536 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2537 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2538 printf("Setting Nparts %d\n",n_parts); 2539 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2540 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2541 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2542 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2543 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2544 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2545 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2546 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2547 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2548 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2549 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2550 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2551 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2552 } 2553 #endif 2554 2555 /* build CSR graph of subdomains' connectivity */ 2556 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2557 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2558 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2559 for (j=0;j<pcis->n_shared[i];j++){ 2560 array_int[ pcis->shared[i][j] ]+=1; 2561 } 2562 } 2563 for (i=1;i<pcis->n_neigh;i++){ 2564 for (j=0;j<pcis->n_shared[i];j++){ 2565 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2566 my_faces++; 2567 break; 2568 } 2569 } 2570 } 2571 2572 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2573 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2574 my_faces=0; 2575 for (i=1;i<pcis->n_neigh;i++){ 2576 for (j=0;j<pcis->n_shared[i];j++){ 2577 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2578 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2579 my_faces++; 2580 break; 2581 } 2582 } 2583 } 2584 if (rank_prec_comm == master_proc) { 2585 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2586 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2587 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2588 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2589 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2590 } 2591 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2592 if (rank_prec_comm == master_proc) { 2593 faces_xadj[0]=0; 2594 faces_displacements[0]=0; 2595 j=0; 2596 for (i=1;i<size_prec_comm+1;i++) { 2597 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2598 if (number_of_faces[i-1]) { 2599 j++; 2600 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2601 } 2602 } 2603 } 2604 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2605 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2606 ierr = PetscFree(array_int);CHKERRQ(ierr); 2607 if (rank_prec_comm == master_proc) { 2608 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2609 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2610 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2611 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2612 } 2613 2614 if ( rank_prec_comm == master_proc ) { 2615 2616 PetscInt heuristic_for_metis=3; 2617 2618 ncon=1; 2619 faces_nvtxs=n_subdomains; 2620 /* partition graoh induced by face connectivity */ 2621 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2622 ierr = METIS_SetDefaultOptions(options); 2623 /* we need a contiguous partition of the coarse mesh */ 2624 options[METIS_OPTION_CONTIG]=1; 2625 options[METIS_OPTION_NITER]=30; 2626 if (pcbddc->coarsening_ratio > 1) { 2627 if (n_subdomains>n_parts*heuristic_for_metis) { 2628 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2629 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2630 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2631 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2632 } else { 2633 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2634 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2635 } 2636 } else { 2637 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2638 } 2639 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2640 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2641 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2642 2643 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2644 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2645 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2646 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2647 } 2648 2649 /* Create new communicator for coarse problem splitting the old one */ 2650 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2651 coarse_color=0; /* for communicator splitting */ 2652 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2653 } 2654 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2655 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2656 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2657 2658 if ( coarse_color == 0 ) { 2659 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2660 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2661 } else { 2662 rank_coarse_comm = MPI_PROC_NULL; 2663 } 2664 2665 /* master proc take care of arranging and distributing coarse information */ 2666 if (rank_coarse_comm == master_proc) { 2667 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2668 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2669 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2670 /* some initializations */ 2671 displacements_recv[0]=0; 2672 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2673 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2674 for (j=0;j<size_coarse_comm;j++) { 2675 for (i=0;i<size_prec_comm;i++) { 2676 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2677 } 2678 } 2679 /* displacements needed for scatterv of total_ranks_recv */ 2680 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2681 2682 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2683 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2684 for (j=0;j<size_coarse_comm;j++) { 2685 for (i=0;i<size_prec_comm;i++) { 2686 if (coarse_subdivision[i]==j) { 2687 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2688 total_count_recv[j]+=1; 2689 } 2690 } 2691 } 2692 /*for (j=0;j<size_coarse_comm;j++) { 2693 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2694 for (i=0;i<total_count_recv[j];i++) { 2695 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2696 } 2697 printf("\n"); 2698 }*/ 2699 2700 /* identify new decomposition in terms of ranks in the old communicator */ 2701 for (i=0;i<n_subdomains;i++) { 2702 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2703 } 2704 /*printf("coarse_subdivision in old end new ranks\n"); 2705 for (i=0;i<size_prec_comm;i++) 2706 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2707 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2708 } else { 2709 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2710 } 2711 printf("\n");*/ 2712 } 2713 2714 /* Scatter new decomposition for send details */ 2715 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2716 /* Scatter receiving details to members of coarse decomposition */ 2717 if ( coarse_color == 0) { 2718 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2719 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2720 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2721 } 2722 2723 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2724 if (coarse_color == 0) { 2725 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2726 for (i=0;i<count_recv;i++) 2727 printf("%d ",ranks_recv[i]); 2728 printf("\n"); 2729 }*/ 2730 2731 if (rank_prec_comm == master_proc) { 2732 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2733 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2734 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2735 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2736 } 2737 #endif 2738 break; 2739 } 2740 2741 case(REPLICATED_BDDC): 2742 2743 pcbddc->coarse_communications_type = GATHERS_BDDC; 2744 coarse_mat_type = MATSEQAIJ; 2745 coarse_pc_type = PCLU; 2746 coarse_ksp_type = KSPPREONLY; 2747 coarse_comm = PETSC_COMM_SELF; 2748 active_rank = rank_prec_comm; 2749 break; 2750 2751 case(PARALLEL_BDDC): 2752 2753 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2754 coarse_mat_type = MATAIJ; 2755 coarse_pc_type = PCREDUNDANT; 2756 coarse_ksp_type = KSPPREONLY; 2757 coarse_comm = prec_comm; 2758 active_rank = rank_prec_comm; 2759 break; 2760 2761 case(SEQUENTIAL_BDDC): 2762 pcbddc->coarse_communications_type = GATHERS_BDDC; 2763 coarse_mat_type = MATAIJ; 2764 coarse_pc_type = PCLU; 2765 coarse_ksp_type = KSPPREONLY; 2766 coarse_comm = PETSC_COMM_SELF; 2767 active_rank = master_proc; 2768 break; 2769 } 2770 2771 switch(pcbddc->coarse_communications_type){ 2772 2773 case(SCATTERS_BDDC): 2774 { 2775 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2776 2777 IS coarse_IS; 2778 2779 if(pcbddc->coarsening_ratio == 1) { 2780 ins_local_primal_size = pcbddc->local_primal_size; 2781 ins_local_primal_indices = pcbddc->local_primal_indices; 2782 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2783 /* nonzeros */ 2784 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2785 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2786 for (i=0;i<ins_local_primal_size;i++) { 2787 dnz[i] = ins_local_primal_size; 2788 } 2789 } else { 2790 PetscMPIInt send_size; 2791 PetscMPIInt *send_buffer; 2792 PetscInt *aux_ins_indices; 2793 PetscInt ii,jj; 2794 MPI_Request *requests; 2795 2796 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2797 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 2798 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 2799 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2800 pcbddc->replicated_primal_size = count_recv; 2801 j = 0; 2802 for (i=0;i<count_recv;i++) { 2803 pcbddc->local_primal_displacements[i] = j; 2804 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 2805 } 2806 pcbddc->local_primal_displacements[count_recv] = j; 2807 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2808 /* allocate auxiliary space */ 2809 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2810 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2811 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2812 /* allocate stuffs for message massing */ 2813 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2814 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2815 /* send indices to be inserted */ 2816 for (i=0;i<count_recv;i++) { 2817 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 2818 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2819 } 2820 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2821 send_size = pcbddc->local_primal_size; 2822 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2823 for (i=0;i<send_size;i++) { 2824 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2825 } 2826 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2827 } 2828 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2829 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2830 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2831 } 2832 j = 0; 2833 for (i=0;i<count_recv;i++) { 2834 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 2835 localsizes2[i] = ii*ii; 2836 localdispl2[i] = j; 2837 j += localsizes2[i]; 2838 jj = pcbddc->local_primal_displacements[i]; 2839 /* it counts the coarse subdomains sharing the coarse node */ 2840 for (k=0;k<ii;k++) { 2841 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 2842 } 2843 } 2844 /* temp_coarse_mat_vals used to store matrix values to be received */ 2845 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2846 /* evaluate how many values I will insert in coarse mat */ 2847 ins_local_primal_size = 0; 2848 for (i=0;i<pcbddc->coarse_size;i++) { 2849 if (aux_ins_indices[i]) { 2850 ins_local_primal_size++; 2851 } 2852 } 2853 /* evaluate indices I will insert in coarse mat */ 2854 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2855 j = 0; 2856 for(i=0;i<pcbddc->coarse_size;i++) { 2857 if(aux_ins_indices[i]) { 2858 ins_local_primal_indices[j] = i; 2859 j++; 2860 } 2861 } 2862 /* processes partecipating in coarse problem receive matrix data from their friends */ 2863 for (i=0;i<count_recv;i++) { 2864 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2865 } 2866 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2867 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2868 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2869 } 2870 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2871 /* nonzeros */ 2872 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2873 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2874 /* use aux_ins_indices to realize a global to local mapping */ 2875 j=0; 2876 for(i=0;i<pcbddc->coarse_size;i++){ 2877 if(aux_ins_indices[i]==0){ 2878 aux_ins_indices[i]=-1; 2879 } else { 2880 aux_ins_indices[i]=j; 2881 j++; 2882 } 2883 } 2884 for (i=0;i<count_recv;i++) { 2885 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 2886 for (k=0;k<j;k++) { 2887 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 2888 } 2889 } 2890 /* check */ 2891 for (i=0;i<ins_local_primal_size;i++) { 2892 if (dnz[i] > ins_local_primal_size) { 2893 dnz[i] = ins_local_primal_size; 2894 } 2895 } 2896 ierr = PetscFree(requests);CHKERRQ(ierr); 2897 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2898 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2899 } 2900 /* create local to global mapping needed by coarse MATIS */ 2901 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2902 coarse_comm = prec_comm; 2903 active_rank = rank_prec_comm; 2904 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2905 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2906 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2907 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2908 /* arrays for values insertion */ 2909 ins_local_primal_size = pcbddc->local_primal_size; 2910 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2911 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2912 for (j=0;j<ins_local_primal_size;j++){ 2913 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2914 for (i=0;i<ins_local_primal_size;i++) { 2915 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2916 } 2917 } 2918 } 2919 break; 2920 2921 } 2922 2923 case(GATHERS_BDDC): 2924 { 2925 2926 PetscMPIInt mysize,mysize2; 2927 PetscMPIInt *send_buffer; 2928 2929 if (rank_prec_comm==active_rank) { 2930 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2931 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 2932 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2933 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2934 /* arrays for values insertion */ 2935 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2936 localdispl2[0]=0; 2937 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2938 j=0; 2939 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2940 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2941 } 2942 2943 mysize=pcbddc->local_primal_size; 2944 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2945 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2946 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 2947 2948 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2949 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2950 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2951 } else { 2952 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2953 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2954 } 2955 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2956 break; 2957 }/* switch on coarse problem and communications associated with finished */ 2958 } 2959 2960 /* Now create and fill up coarse matrix */ 2961 if ( rank_prec_comm == active_rank ) { 2962 2963 Mat matis_coarse_local_mat; 2964 2965 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2966 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2967 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2968 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2969 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2970 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2971 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2972 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2973 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2974 } else { 2975 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2976 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2977 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2978 ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr); 2979 ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr); 2980 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2981 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2982 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2983 } 2984 /* preallocation */ 2985 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2986 2987 PetscInt lrows,lcols,bs; 2988 2989 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2990 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2991 ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr); 2992 2993 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2994 2995 Vec vec_dnz,vec_onz; 2996 PetscScalar *my_dnz,*my_onz,*array; 2997 PetscInt *mat_ranges,*row_ownership; 2998 PetscInt coarse_index_row,coarse_index_col,owner; 2999 3000 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 3001 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 3002 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);CHKERRQ(ierr); 3003 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 3004 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 3005 3006 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 3007 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 3008 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3009 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 3010 3011 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 3012 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 3013 for (i=0;i<size_prec_comm;i++) { 3014 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 3015 row_ownership[j]=i; 3016 } 3017 } 3018 3019 for (i=0;i<pcbddc->local_primal_size;i++) { 3020 coarse_index_row = pcbddc->local_primal_indices[i]; 3021 owner = row_ownership[coarse_index_row]; 3022 for (j=i;j<pcbddc->local_primal_size;j++) { 3023 owner = row_ownership[coarse_index_row]; 3024 coarse_index_col = pcbddc->local_primal_indices[j]; 3025 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 3026 my_dnz[i] += 1.0; 3027 } else { 3028 my_onz[i] += 1.0; 3029 } 3030 if (i != j) { 3031 owner = row_ownership[coarse_index_col]; 3032 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 3033 my_dnz[j] += 1.0; 3034 } else { 3035 my_onz[j] += 1.0; 3036 } 3037 } 3038 } 3039 } 3040 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 3041 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 3042 if (pcbddc->local_primal_size) { 3043 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 3044 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 3045 } 3046 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 3047 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 3048 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 3049 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 3050 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 3051 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 3052 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 3053 3054 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 3055 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 3056 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 3057 3058 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 3059 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 3060 ierr = PetscFree(my_onz);CHKERRQ(ierr); 3061 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 3062 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 3063 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 3064 } else { 3065 for (k=0;k<size_prec_comm;k++){ 3066 offset=pcbddc->local_primal_displacements[k]; 3067 offset2=localdispl2[k]; 3068 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3069 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3070 for (j=0;j<ins_local_primal_size;j++){ 3071 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3072 } 3073 for (j=0;j<ins_local_primal_size;j++) { 3074 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 3075 } 3076 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3077 } 3078 } 3079 3080 /* check */ 3081 for (i=0;i<lrows;i++) { 3082 if (dnz[i]>lcols) dnz[i]=lcols; 3083 if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols; 3084 } 3085 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 3086 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 3087 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3088 } else { 3089 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3090 ierr = PetscFree(dnz);CHKERRQ(ierr); 3091 } 3092 /* insert values */ 3093 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3094 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3095 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3096 if (pcbddc->coarsening_ratio == 1) { 3097 ins_coarse_mat_vals = coarse_submat_vals; 3098 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3099 } else { 3100 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3101 for (k=0;k<pcbddc->replicated_primal_size;k++) { 3102 offset = pcbddc->local_primal_displacements[k]; 3103 offset2 = localdispl2[k]; 3104 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 3105 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3106 for (j=0;j<ins_local_primal_size;j++){ 3107 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3108 } 3109 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3110 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3111 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3112 } 3113 } 3114 ins_local_primal_indices = 0; 3115 ins_coarse_mat_vals = 0; 3116 } else { 3117 for (k=0;k<size_prec_comm;k++){ 3118 offset=pcbddc->local_primal_displacements[k]; 3119 offset2=localdispl2[k]; 3120 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3121 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3122 for (j=0;j<ins_local_primal_size;j++){ 3123 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 3124 } 3125 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3126 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3127 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3128 } 3129 ins_local_primal_indices = 0; 3130 ins_coarse_mat_vals = 0; 3131 } 3132 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 /* symmetry of coarse matrix */ 3135 if (issym) { 3136 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3137 } 3138 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3139 } 3140 3141 /* create loc to glob scatters if needed */ 3142 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3143 IS local_IS,global_IS; 3144 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3145 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3146 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3147 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3148 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3149 } 3150 3151 /* free memory no longer needed */ 3152 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3153 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3154 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3155 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3156 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3157 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3158 3159 /* Compute coarse null space */ 3160 CoarseNullSpace = 0; 3161 if (pcbddc->NullSpace) { 3162 ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr); 3163 } 3164 3165 /* KSP for coarse problem */ 3166 if (rank_prec_comm == active_rank) { 3167 PetscBool isbddc=PETSC_FALSE; 3168 3169 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3170 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3171 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3172 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3173 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3174 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3175 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3176 /* Allow user's customization */ 3177 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3178 /* Set Up PC for coarse problem BDDC */ 3179 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3180 i = pcbddc->current_level+1; 3181 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3182 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3183 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3184 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3185 if (CoarseNullSpace) { 3186 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3187 } 3188 if (dbg_flag) { 3189 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3191 } 3192 } else { 3193 if (CoarseNullSpace) { 3194 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3195 } 3196 } 3197 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3198 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3199 3200 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3201 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3202 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3203 if (j == 1) { 3204 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3205 if (isbddc) { 3206 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3207 } 3208 } 3209 } 3210 /* Check coarse problem if requested */ 3211 if ( dbg_flag && rank_prec_comm == active_rank ) { 3212 KSP check_ksp; 3213 PC check_pc; 3214 Vec check_vec; 3215 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3216 KSPType check_ksp_type; 3217 3218 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3219 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3220 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3221 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3222 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3223 if (issym) check_ksp_type = KSPCG; 3224 else check_ksp_type = KSPGMRES; 3225 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3226 } else { 3227 check_ksp_type = KSPPREONLY; 3228 } 3229 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3230 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3231 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3232 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3233 /* create random vec */ 3234 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3235 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3236 if (CoarseNullSpace) { 3237 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3238 } 3239 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3240 /* solve coarse problem */ 3241 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3242 if (CoarseNullSpace) { 3243 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3244 } 3245 /* check coarse problem residual error */ 3246 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3247 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3248 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3249 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3250 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3251 /* get eigenvalue estimation if inexact */ 3252 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3253 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3254 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3255 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3256 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3257 } 3258 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3259 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3260 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3261 } 3262 if (dbg_flag) { 3263 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3264 } 3265 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3266 3267 PetscFunctionReturn(0); 3268 } 3269 3270