1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 105 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 106 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 107 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 110 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 113 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 114 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 116 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCBDDCCreateWorkVectors" 122 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 123 { 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PC_IS *pcis = (PC_IS*)pc->data; 126 VecType impVecType; 127 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 132 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 133 local_primal_size = n_constraints+n_vertices; 134 n_R = pcis->n-n_vertices; 135 /* local work vectors */ 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 138 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 139 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 140 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 141 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 145 if (n_constraints) { 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 149 } 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 155 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 156 { 157 PetscErrorCode ierr; 158 /* pointers to pcis and pcbddc */ 159 PC_IS* pcis = (PC_IS*)pc->data; 160 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 161 /* submatrices of local problem */ 162 Mat A_RV,A_VR,A_VV; 163 /* working matrices */ 164 Mat M1,M2,M3,C_CR; 165 /* working vectors */ 166 Vec vec1_C,vec2_C,vec1_V,vec2_V; 167 /* additional working stuff */ 168 IS is_aux; 169 ISLocalToGlobalMapping BtoNmap; 170 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 171 const PetscScalar *array,*row_cmat_values; 172 const PetscInt *row_cmat_indices,*idx_R_local; 173 PetscInt *vertices,*idx_V_B,*auxindices; 174 PetscInt n_vertices,n_constraints,size_of_constraint; 175 PetscInt i,j,n_R,n_D,n_B; 176 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 177 /* Vector and matrix types */ 178 VecType impVecType; 179 MatType impMatType; 180 /* some shortcuts to scalars */ 181 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 182 /* for debugging purposes */ 183 PetscReal *coarsefunctions_errors,*constraints_errors; 184 185 PetscFunctionBegin; 186 /* get number of vertices and their local indices */ 187 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 188 n_constraints = pcbddc->local_primal_size-n_vertices; 189 /* Set Non-overlapping dimensions */ 190 n_B = pcis->n_B; n_D = pcis->n - n_B; 191 n_R = pcis->n-n_vertices; 192 193 /* Set types for local objects needed by BDDC precondtioner */ 194 impMatType = MATSEQDENSE; 195 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 196 197 /* Allocating some extra storage just to be safe */ 198 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 199 for (i=0;i<pcis->n;i++) auxindices[i]=i; 200 201 /* vertices in boundary numbering */ 202 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 203 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 204 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 206 if (i != n_vertices) { 207 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 208 } 209 210 /* some work vectors on vertices and/or constraints */ 211 if (n_vertices) { 212 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 213 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 214 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 215 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 216 } 217 if (n_constraints) { 218 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 219 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 220 } 221 222 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 223 if (n_constraints) { 224 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 225 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 226 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 227 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 228 229 /* Extract constraints on R nodes: C_{CR} */ 230 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 231 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 232 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 233 234 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 235 for (i=0;i<n_constraints;i++) { 236 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 237 /* Get row of constraint matrix in R numbering */ 238 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 239 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 240 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 241 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 242 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 243 /* Solve for row of constraint matrix in R numbering */ 244 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 245 /* Set values in local_auxmat2 */ 246 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 247 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 248 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 249 } 250 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 253 254 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 255 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 256 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 257 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 258 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 259 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 260 ierr = MatSetUp(M1);CHKERRQ(ierr); 261 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 262 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 263 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 264 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 266 ierr = MatDestroy(&M2);CHKERRQ(ierr); 267 ierr = MatDestroy(&M3);CHKERRQ(ierr); 268 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 269 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 270 } 271 272 /* Get submatrices from subdomain matrix */ 273 if (n_vertices) { 274 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 275 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 276 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 277 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 278 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 279 } 280 281 /* Matrix of coarse basis functions (local) */ 282 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 283 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 284 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 285 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 286 if (pcbddc->switch_static || pcbddc->dbg_flag) { 287 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 288 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 289 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 290 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 291 } 292 293 if (pcbddc->dbg_flag) { 294 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 295 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 296 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 297 } 298 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 299 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 300 301 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 302 303 /* vertices */ 304 for (i=0;i<n_vertices;i++) { 305 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 306 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 307 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 308 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 309 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 310 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 311 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 313 if (n_constraints) { 314 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 315 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 316 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 317 } 318 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 319 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 320 321 /* Set values in coarse basis function and subdomain part of coarse_mat */ 322 /* coarse basis functions */ 323 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 324 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 325 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 326 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 327 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 328 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 329 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 330 if (pcbddc->switch_static || pcbddc->dbg_flag) { 331 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 333 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 334 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 335 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 336 } 337 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 338 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 339 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 341 if (n_constraints) { 342 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 343 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 345 } 346 347 /* check */ 348 if (pcbddc->dbg_flag) { 349 /* assemble subdomain vector on local nodes */ 350 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 351 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 352 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 354 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 355 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 356 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 357 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 358 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 359 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 360 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 362 if (n_constraints) { 363 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 364 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 365 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 366 } 367 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 368 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 369 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 370 /* check saddle point solution */ 371 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 372 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 373 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 374 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 375 /* shift by the identity matrix */ 376 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 379 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 380 } 381 } 382 383 /* constraints */ 384 for (i=0;i<n_constraints;i++) { 385 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 386 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 387 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 388 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 389 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 390 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 391 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 392 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 393 if (n_vertices) { 394 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 395 } 396 /* Set values in coarse basis function and subdomain part of coarse_mat */ 397 /* coarse basis functions */ 398 j = i+n_vertices; /* don't touch this! */ 399 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 400 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 405 if (pcbddc->switch_static || pcbddc->dbg_flag) { 406 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 409 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 411 } 412 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 413 if (n_vertices) { 414 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 417 } 418 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 420 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 421 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 429 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 430 /* assemble subdomain vector of lagrange multipliers */ 431 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 432 if (n_vertices) { 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 } 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 441 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 443 /* check saddle point solution */ 444 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 445 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 447 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 448 /* shift by the identity matrix */ 449 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 450 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 453 } 454 } 455 /* call assembling routines for local coarse basis */ 456 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 if (pcbddc->switch_static || pcbddc->dbg_flag) { 459 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 460 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 461 } 462 463 /* compute other basis functions for non-symmetric problems */ 464 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 465 if (!setsym || (setsym && !issym)) { 466 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 467 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 468 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 469 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 470 if (pcbddc->switch_static || pcbddc->dbg_flag) { 471 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 472 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 473 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 474 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 475 } 476 for (i=0;i<pcbddc->local_primal_size;i++) { 477 if (n_constraints) { 478 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 479 for (j=0;j<n_constraints;j++) { 480 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 481 } 482 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 483 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 484 } 485 if (i<n_vertices) { 486 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 487 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 488 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 489 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 490 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 491 if (n_constraints) { 492 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 493 } 494 } else { 495 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 496 } 497 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 498 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 502 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 504 if (i<n_vertices) { 505 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 506 } 507 if (pcbddc->switch_static || pcbddc->dbg_flag) { 508 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 513 } 514 515 if (pcbddc->dbg_flag) { 516 /* assemble subdomain vector on nodes */ 517 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 518 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 519 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 520 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 521 if (i<n_vertices) { 522 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 523 } 524 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 525 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 526 /* assemble subdomain vector of lagrange multipliers */ 527 for (j=0;j<pcbddc->local_primal_size;j++) { 528 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 532 /* check saddle point solution */ 533 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 534 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 535 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 536 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 537 /* shift by the identity matrix */ 538 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 542 } 543 } 544 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 if (pcbddc->switch_static || pcbddc->dbg_flag) { 547 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 } 550 } 551 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 552 /* Checking coarse_sub_mat and coarse basis functios */ 553 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 554 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 555 if (pcbddc->dbg_flag) { 556 Mat coarse_sub_mat; 557 Mat AUXMAT,TM1,TM2,TM3,TM4; 558 Mat coarse_phi_D,coarse_phi_B; 559 Mat coarse_psi_D,coarse_psi_B; 560 Mat A_II,A_BB,A_IB,A_BI; 561 MatType checkmattype=MATSEQAIJ; 562 PetscReal real_value; 563 564 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 565 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 566 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 567 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 568 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 569 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 570 if (pcbddc->coarse_psi_B) { 571 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 572 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 573 } 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 575 576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 579 if (pcbddc->coarse_psi_B) { 580 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 581 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 582 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 583 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 584 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 585 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 586 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 } else { 593 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 594 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 595 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 } 602 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 603 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 604 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 605 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 606 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 607 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 612 for (i=0;i<pcbddc->local_primal_size;i++) { 613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 614 } 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 616 for (i=0;i<pcbddc->local_primal_size;i++) { 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 618 } 619 if (pcbddc->coarse_psi_B) { 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 621 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 623 } 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 625 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 627 } 628 } 629 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 630 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 631 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 632 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 633 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 634 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 635 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 636 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 637 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 638 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 639 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 640 if (pcbddc->coarse_psi_B) { 641 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 642 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 643 } 644 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 645 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 646 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 647 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 648 } 649 /* free memory */ 650 if (n_vertices) { 651 ierr = PetscFree(vertices);CHKERRQ(ierr); 652 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 653 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 654 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 655 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 656 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 657 } 658 if (n_constraints) { 659 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 660 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 661 ierr = MatDestroy(&M1);CHKERRQ(ierr); 662 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 663 } 664 ierr = PetscFree(auxindices);CHKERRQ(ierr); 665 /* get back data */ 666 *coarse_submat_vals_n = coarse_submat_vals; 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 672 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 673 { 674 PC_IS* pcis = (PC_IS*)(pc->data); 675 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 676 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 677 /* manage repeated solves */ 678 MatReuse reuse; 679 MatStructure matstruct; 680 PetscErrorCode ierr; 681 682 PetscFunctionBegin; 683 /* get mat flags */ 684 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 685 reuse = MAT_INITIAL_MATRIX; 686 if (pc->setupcalled) { 687 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 688 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 689 if (matstruct == SAME_NONZERO_PATTERN) { 690 reuse = MAT_REUSE_MATRIX; 691 } else { 692 reuse = MAT_INITIAL_MATRIX; 693 } 694 } 695 if (reuse == MAT_INITIAL_MATRIX) { 696 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 701 } 702 703 /* transform local matrices if needed */ 704 if (pcbddc->use_change_of_basis) { 705 Mat change_mat_all; 706 PetscScalar *row_cmat_values; 707 PetscInt *row_cmat_indices; 708 PetscInt *nnz,*is_indices,*temp_indices; 709 PetscInt i,j,k,n_D,n_B; 710 711 /* Get Non-overlapping dimensions */ 712 n_B = pcis->n_B; 713 n_D = pcis->n-n_B; 714 715 /* compute nonzero structure of change of basis on all local nodes */ 716 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 717 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 718 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 719 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 720 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 721 k=1; 722 for (i=0;i<n_B;i++) { 723 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 724 nnz[is_indices[i]]=j; 725 if (k < j) k = j; 726 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 727 } 728 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 729 /* assemble change of basis matrix on the whole set of local dofs */ 730 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 731 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 732 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 733 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 734 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 735 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 736 for (i=0;i<n_D;i++) { 737 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 738 } 739 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 743 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 744 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 746 } 747 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 748 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 749 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 750 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 751 if (i==1) { 752 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 753 } else { 754 Mat work_mat; 755 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 756 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 757 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 758 } 759 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 760 ierr = PetscFree(nnz);CHKERRQ(ierr); 761 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 762 } else { 763 /* without change of basis, the local matrix is unchanged */ 764 if (!pcbddc->local_mat) { 765 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 766 pcbddc->local_mat = matis->A; 767 } 768 } 769 770 /* get submatrices */ 771 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 772 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 773 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 774 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 780 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 781 { 782 PC_IS* pcis = (PC_IS*)(pc->data); 783 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 784 IS is_aux1,is_aux2; 785 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 786 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 787 PetscBT bitmask; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* Set Non-overlapping dimensions */ 792 n_B = pcis->n_B; n_D = pcis->n - n_B; 793 /* get vertex indices from constraint matrix */ 794 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 795 /* Set number of constraints */ 796 n_constraints = pcbddc->local_primal_size-n_vertices; 797 /* create auxiliary bitmask */ 798 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 801 } 802 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 803 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 804 for (i=0, n_R=0; i<pcis->n; i++) { 805 if (!PetscBTLookup(bitmask,i)) { 806 idx_R_local[n_R] = i; 807 n_R++; 808 } 809 } 810 ierr = PetscFree(vertices);CHKERRQ(ierr); 811 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 812 813 /* print some info if requested */ 814 if (pcbddc->dbg_flag) { 815 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 821 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 822 } 823 824 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 825 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 826 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 827 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 828 for (i=0; i<n_D; i++) { 829 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 830 } 831 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 832 for (i=0, j=0; i<n_R; i++) { 833 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 834 aux_array1[j++] = i; 835 } 836 } 837 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 838 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 839 for (i=0, j=0; i<n_B; i++) { 840 if (!PetscBTLookup(bitmask,is_indices[i])) { 841 aux_array2[j++] = i; 842 } 843 } 844 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 845 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 846 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 848 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 849 850 if (pcbddc->switch_static || pcbddc->dbg_flag) { 851 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 852 for (i=0, j=0; i<n_R; i++) { 853 if (PetscBTLookup(bitmask,idx_R_local[i])) { 854 aux_array1[j++] = i; 855 } 856 } 857 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 858 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 859 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 860 } 861 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 867 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 868 { 869 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 870 871 PetscFunctionBegin; 872 pcbddc->use_exact_dirichlet=use; 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 878 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 879 { 880 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 881 PC_IS *pcis = (PC_IS*)pc->data; 882 PC pc_temp; 883 Mat A_RR; 884 Vec vec1,vec2,vec3; 885 MatStructure matstruct; 886 PetscScalar m_one = -1.0; 887 PetscReal value; 888 PetscInt n_D,n_R,use_exact,use_exact_reduced; 889 PetscErrorCode ierr; 890 891 PetscFunctionBegin; 892 /* Creating PC contexts for local Dirichlet and Neumann problems */ 893 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 894 895 /* DIRICHLET PROBLEM */ 896 /* Matrix for Dirichlet problem is pcis->A_II */ 897 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 898 if (!pcbddc->ksp_D) { /* create object if not yet build */ 899 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 900 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 901 /* default */ 902 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 903 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 904 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 905 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 906 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 907 } 908 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 909 /* Allow user's customization */ 910 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 911 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 912 if (!n_D) { 913 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 914 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 915 } 916 /* Set Up KSP for Dirichlet problem of BDDC */ 917 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 918 /* set ksp_D into pcis data */ 919 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 920 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 921 pcis->ksp_D = pcbddc->ksp_D; 922 923 /* NEUMANN PROBLEM */ 924 /* Matrix for Neumann problem is A_RR -> we need to create it */ 925 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 926 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 927 if (!pcbddc->ksp_R) { /* create object if not yet build */ 928 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 929 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 930 /* default */ 931 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 932 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 933 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 934 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 935 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 936 } 937 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 938 /* Allow user's customization */ 939 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 940 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 941 if (!n_R) { 942 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 943 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 944 } 945 /* Set Up KSP for Neumann problem of BDDC */ 946 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 947 948 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 949 950 /* Dirichlet */ 951 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 952 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 953 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 954 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 955 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 956 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 957 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 958 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 959 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 960 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 961 /* need to be adapted? */ 962 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 963 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 964 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 965 /* print info */ 966 if (pcbddc->dbg_flag) { 967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 969 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 972 } 973 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 974 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 975 } 976 977 /* Neumann */ 978 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 979 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 980 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 981 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 982 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 983 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 984 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 985 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 986 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 987 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 988 /* need to be adapted? */ 989 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 990 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 991 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 992 /* print info */ 993 if (pcbddc->dbg_flag) { 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 996 } 997 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 998 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 999 } 1000 1001 /* free Neumann problem's matrix */ 1002 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1008 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1009 { 1010 PetscErrorCode ierr; 1011 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1012 1013 PetscFunctionBegin; 1014 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1015 if (pcbddc->local_auxmat1) { 1016 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1017 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1018 } 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1024 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1025 { 1026 PetscErrorCode ierr; 1027 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1028 PC_IS* pcis = (PC_IS*) (pc->data); 1029 const PetscScalar zero = 0.0; 1030 1031 PetscFunctionBegin; 1032 /* Application of PHI^T (or PSI^T) */ 1033 if (pcbddc->coarse_psi_B) { 1034 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1035 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1036 } else { 1037 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1038 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1039 } 1040 /* Scatter data of coarse_rhs */ 1041 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1042 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 1044 /* Local solution on R nodes */ 1045 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1046 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1048 if (pcbddc->switch_static) { 1049 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1050 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1051 } 1052 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1053 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1054 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 if (pcbddc->switch_static) { 1057 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 } 1060 1061 /* Coarse solution */ 1062 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1064 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1067 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 1069 /* Sum contributions from two levels */ 1070 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1071 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1077 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1078 { 1079 PetscErrorCode ierr; 1080 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1081 1082 PetscFunctionBegin; 1083 switch (pcbddc->coarse_communications_type) { 1084 case SCATTERS_BDDC: 1085 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1086 break; 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1093 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1094 { 1095 PetscErrorCode ierr; 1096 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1097 1098 PetscFunctionBegin; 1099 switch (pcbddc->coarse_communications_type) { 1100 case SCATTERS_BDDC: 1101 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1102 break; 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* uncomment for testing purposes */ 1108 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1111 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1112 { 1113 PetscErrorCode ierr; 1114 PC_IS* pcis = (PC_IS*)(pc->data); 1115 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1116 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1117 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1118 MatType impMatType=MATSEQAIJ; 1119 /* one and zero */ 1120 PetscScalar one=1.0,zero=0.0; 1121 /* space to store constraints and their local indices */ 1122 PetscScalar *temp_quadrature_constraint; 1123 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1124 /* iterators */ 1125 PetscInt i,j,k,total_counts,temp_start_ptr; 1126 /* stuff to store connected components stored in pcbddc->mat_graph */ 1127 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1128 PetscInt n_ISForFaces,n_ISForEdges; 1129 /* near null space stuff */ 1130 MatNullSpace nearnullsp; 1131 const Vec *nearnullvecs; 1132 Vec *localnearnullsp; 1133 PetscBool nnsp_has_cnst; 1134 PetscInt nnsp_size; 1135 PetscScalar *array; 1136 /* BLAS integers */ 1137 PetscBLASInt lwork,lierr; 1138 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1139 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1140 /* LAPACK working arrays for SVD or POD */ 1141 PetscBool skip_lapack; 1142 PetscScalar *work; 1143 PetscReal *singular_vals; 1144 #if defined(PETSC_USE_COMPLEX) 1145 PetscReal *rwork; 1146 #endif 1147 #if defined(PETSC_MISSING_LAPACK_GESVD) 1148 PetscBLASInt Blas_one_2=1; 1149 PetscScalar *temp_basis,*correlation_mat; 1150 #else 1151 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1152 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1153 #endif 1154 /* change of basis */ 1155 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1156 PetscBool boolforchange; 1157 PetscBT touched,change_basis; 1158 /* auxiliary stuff */ 1159 PetscInt *nnz,*is_indices,*local_to_B; 1160 /* some quantities */ 1161 PetscInt n_vertices,total_primal_vertices; 1162 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1163 1164 1165 PetscFunctionBegin; 1166 /* Get index sets for faces, edges and vertices from graph */ 1167 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1168 pcbddc->use_vertices = PETSC_TRUE; 1169 } 1170 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1171 /* print some info */ 1172 if (pcbddc->dbg_flag) { 1173 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1174 i = 0; 1175 if (ISForVertices) { 1176 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1177 } 1178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 /* check if near null space is attached to global mat */ 1184 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1185 if (nearnullsp) { 1186 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1187 } else { /* if near null space is not provided BDDC uses constants by default */ 1188 nnsp_size = 0; 1189 nnsp_has_cnst = PETSC_TRUE; 1190 } 1191 /* get max number of constraints on a single cc */ 1192 max_constraints = nnsp_size; 1193 if (nnsp_has_cnst) max_constraints++; 1194 1195 /* 1196 Evaluate maximum storage size needed by the procedure 1197 - temp_indices will contain start index of each constraint stored as follows 1198 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1199 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1200 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1201 */ 1202 total_counts = n_ISForFaces+n_ISForEdges; 1203 total_counts *= max_constraints; 1204 n_vertices = 0; 1205 if (ISForVertices) { 1206 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1207 } 1208 total_counts += n_vertices; 1209 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1210 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1211 total_counts = 0; 1212 max_size_of_constraint = 0; 1213 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1214 if (i<n_ISForEdges) { 1215 used_IS = &ISForEdges[i]; 1216 } else { 1217 used_IS = &ISForFaces[i-n_ISForEdges]; 1218 } 1219 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1220 total_counts += j; 1221 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1222 } 1223 total_counts *= max_constraints; 1224 total_counts += n_vertices; 1225 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1226 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1227 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1228 /* local to boundary numbering */ 1229 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1230 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1231 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1232 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1233 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1234 /* get local part of global near null space vectors */ 1235 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1236 for (k=0;k<nnsp_size;k++) { 1237 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1238 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 } 1241 1242 /* whether or not to skip lapack calls */ 1243 skip_lapack = PETSC_TRUE; 1244 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1245 1246 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1247 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1248 PetscScalar temp_work; 1249 #if defined(PETSC_MISSING_LAPACK_GESVD) 1250 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1251 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1252 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1253 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1254 #if defined(PETSC_USE_COMPLEX) 1255 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1256 #endif 1257 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1258 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1259 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1260 lwork = -1; 1261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1262 #if !defined(PETSC_USE_COMPLEX) 1263 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1264 #else 1265 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1266 #endif 1267 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1268 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1269 #else /* on missing GESVD */ 1270 /* SVD */ 1271 PetscInt max_n,min_n; 1272 max_n = max_size_of_constraint; 1273 min_n = max_constraints; 1274 if (max_size_of_constraint < max_constraints) { 1275 min_n = max_size_of_constraint; 1276 max_n = max_constraints; 1277 } 1278 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1279 #if defined(PETSC_USE_COMPLEX) 1280 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1281 #endif 1282 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1283 lwork = -1; 1284 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1285 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1286 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1287 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1288 #if !defined(PETSC_USE_COMPLEX) 1289 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1290 #else 1291 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1292 #endif 1293 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1294 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1295 #endif /* on missing GESVD */ 1296 /* Allocate optimal workspace */ 1297 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1298 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1299 } 1300 /* Now we can loop on constraining sets */ 1301 total_counts = 0; 1302 temp_indices[0] = 0; 1303 /* vertices */ 1304 if (ISForVertices) { 1305 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1306 if (nnsp_has_cnst) { /* consider all vertices */ 1307 for (i=0;i<n_vertices;i++) { 1308 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1309 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1310 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1311 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1312 total_counts++; 1313 } 1314 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1315 PetscBool used_vertex; 1316 for (i=0;i<n_vertices;i++) { 1317 used_vertex = PETSC_FALSE; 1318 k = 0; 1319 while (!used_vertex && k<nnsp_size) { 1320 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1321 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1322 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1323 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1324 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1325 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1326 total_counts++; 1327 used_vertex = PETSC_TRUE; 1328 } 1329 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1330 k++; 1331 } 1332 } 1333 } 1334 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1335 n_vertices = total_counts; 1336 } 1337 1338 /* edges and faces */ 1339 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1340 if (i<n_ISForEdges) { 1341 used_IS = &ISForEdges[i]; 1342 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1343 } else { 1344 used_IS = &ISForFaces[i-n_ISForEdges]; 1345 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1346 } 1347 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1348 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1349 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1350 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1351 /* change of basis should not be performed on local periodic nodes */ 1352 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1353 if (nnsp_has_cnst) { 1354 PetscScalar quad_value; 1355 temp_constraints++; 1356 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1357 for (j=0;j<size_of_constraint;j++) { 1358 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1359 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1360 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1361 } 1362 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1363 if (boolforchange) { 1364 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1365 } 1366 total_counts++; 1367 } 1368 for (k=0;k<nnsp_size;k++) { 1369 PetscReal real_value; 1370 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1371 for (j=0;j<size_of_constraint;j++) { 1372 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1373 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1374 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1375 } 1376 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1377 /* check if array is null on the connected component */ 1378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1379 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1380 if (real_value > 0.0) { /* keep indices and values */ 1381 temp_constraints++; 1382 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1383 if (boolforchange) { 1384 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1385 } 1386 total_counts++; 1387 } 1388 } 1389 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1390 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1391 if (!pcbddc->use_nnsp_true) { 1392 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1393 1394 #if defined(PETSC_MISSING_LAPACK_GESVD) 1395 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1396 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1397 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1398 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1399 from that computed using LAPACKgesvd 1400 -> This is due to a different computation of eigenvectors in LAPACKheev 1401 -> The quality of the POD-computed basis will be the same */ 1402 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1403 /* Store upper triangular part of correlation matrix */ 1404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1405 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1406 for (j=0;j<temp_constraints;j++) { 1407 for (k=0;k<j+1;k++) { 1408 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1409 } 1410 } 1411 /* compute eigenvalues and eigenvectors of correlation matrix */ 1412 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1413 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1414 #if !defined(PETSC_USE_COMPLEX) 1415 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1416 #else 1417 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1418 #endif 1419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1421 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1422 j=0; 1423 while (j < temp_constraints && singular_vals[j] < tol) j++; 1424 total_counts=total_counts-j; 1425 /* scale and copy POD basis into used quadrature memory */ 1426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1427 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1428 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1430 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1431 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1432 if (j<temp_constraints) { 1433 PetscInt ii; 1434 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1435 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1436 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1437 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1438 for (k=0;k<temp_constraints-j;k++) { 1439 for (ii=0;ii<size_of_constraint;ii++) { 1440 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1441 } 1442 } 1443 } 1444 #else /* on missing GESVD */ 1445 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1446 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1447 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1448 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1449 #if !defined(PETSC_USE_COMPLEX) 1450 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1451 #else 1452 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1453 #endif 1454 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1455 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1456 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1457 k = temp_constraints; 1458 if (k > size_of_constraint) k = size_of_constraint; 1459 j = 0; 1460 while (j < k && singular_vals[k-j-1] < tol) j++; 1461 total_counts = total_counts-temp_constraints+k-j; 1462 #endif /* on missing GESVD */ 1463 } 1464 } 1465 /* free index sets of faces, edges and vertices */ 1466 for (i=0;i<n_ISForFaces;i++) { 1467 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1468 } 1469 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1470 for (i=0;i<n_ISForEdges;i++) { 1471 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1472 } 1473 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1474 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1475 1476 /* free workspace */ 1477 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1478 ierr = PetscFree(work);CHKERRQ(ierr); 1479 #if defined(PETSC_USE_COMPLEX) 1480 ierr = PetscFree(rwork);CHKERRQ(ierr); 1481 #endif 1482 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1483 #if defined(PETSC_MISSING_LAPACK_GESVD) 1484 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1485 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1486 #endif 1487 } 1488 for (k=0;k<nnsp_size;k++) { 1489 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1490 } 1491 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1492 1493 /* set quantities in pcbddc data structure */ 1494 /* n_vertices defines the number of subdomain corners in the primal space */ 1495 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1496 pcbddc->local_primal_size = total_counts; 1497 pcbddc->n_vertices = n_vertices; 1498 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1499 1500 /* Create constraint matrix */ 1501 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1502 /* so we need to set it up properly either with or without change of basis */ 1503 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1504 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1505 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1506 /* array to compute a local numbering of constraints : vertices first then constraints */ 1507 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1508 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1509 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1510 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1511 /* auxiliary stuff for basis change */ 1512 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1513 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1514 1515 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1516 total_primal_vertices=0; 1517 for (i=0;i<pcbddc->local_primal_size;i++) { 1518 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1519 if (size_of_constraint == 1) { 1520 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1521 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1522 aux_primal_minloc[total_primal_vertices]=0; 1523 total_primal_vertices++; 1524 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1525 PetscInt min_loc,min_index; 1526 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1527 /* find first untouched local node */ 1528 k = 0; 1529 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1530 min_index = global_indices[k]; 1531 min_loc = k; 1532 /* search the minimum among global nodes already untouched on the cc */ 1533 for (k=1;k<size_of_constraint;k++) { 1534 /* there can be more than one constraint on a single connected component */ 1535 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1536 min_index = global_indices[k]; 1537 min_loc = k; 1538 } 1539 } 1540 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1541 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1542 aux_primal_minloc[total_primal_vertices]=min_loc; 1543 total_primal_vertices++; 1544 } 1545 } 1546 /* free workspace */ 1547 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1548 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1549 /* permute indices in order to have a sorted set of vertices */ 1550 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1551 1552 /* nonzero structure of constraint matrix */ 1553 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1554 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1555 j=total_primal_vertices; 1556 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1557 if (!PetscBTLookup(change_basis,i)) { 1558 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1559 j++; 1560 } 1561 } 1562 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1563 ierr = PetscFree(nnz);CHKERRQ(ierr); 1564 /* set values in constraint matrix */ 1565 for (i=0;i<total_primal_vertices;i++) { 1566 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1567 } 1568 total_counts = total_primal_vertices; 1569 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1570 if (!PetscBTLookup(change_basis,i)) { 1571 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1572 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1573 total_counts++; 1574 } 1575 } 1576 /* assembling */ 1577 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1578 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1579 /* 1580 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1581 */ 1582 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1583 if (pcbddc->use_change_of_basis) { 1584 PetscBool qr_needed = PETSC_FALSE; 1585 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1586 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1587 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1588 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1589 /* work arrays */ 1590 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1591 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1592 /* nonzeros per row */ 1593 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1594 if (PetscBTLookup(change_basis,i)) { 1595 qr_needed = PETSC_TRUE; 1596 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1597 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1598 } 1599 } 1600 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1601 ierr = PetscFree(nnz);CHKERRQ(ierr); 1602 /* Set initial identity in the matrix */ 1603 for (i=0;i<pcis->n_B;i++) { 1604 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 1607 /* Now we loop on the constraints which need a change of basis */ 1608 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1609 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1610 /* Change of basis matrix T computed via QR decomposition of constraints */ 1611 if (qr_needed) { 1612 /* dual and primal dofs on a single cc */ 1613 PetscInt dual_dofs,primal_dofs; 1614 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1615 PetscInt primal_counter; 1616 /* working stuff for GEQRF */ 1617 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1618 PetscBLASInt lqr_work; 1619 /* working stuff for UNGQR */ 1620 PetscScalar *gqr_work,lgqr_work_t; 1621 PetscBLASInt lgqr_work; 1622 /* working stuff for TRTRS */ 1623 PetscScalar *trs_rhs; 1624 PetscBLASInt Blas_NRHS; 1625 /* pointers for values insertion into change of basis matrix */ 1626 PetscInt *start_rows,*start_cols; 1627 PetscScalar *start_vals; 1628 /* working stuff for values insertion */ 1629 PetscBT is_primal; 1630 1631 /* space to store Q */ 1632 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1633 /* first we issue queries for optimal work */ 1634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1635 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1637 lqr_work = -1; 1638 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1639 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1640 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1641 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1642 lgqr_work = -1; 1643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1644 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1645 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1646 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1647 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1648 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1649 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1650 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1651 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1652 /* array to store scaling factors for reflectors */ 1653 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1654 /* array to store rhs and solution of triangular solver */ 1655 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1656 /* array to store whether a node is primal or not */ 1657 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1658 for (i=0;i<total_primal_vertices;i++) { 1659 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1660 } 1661 1662 /* allocating workspace for check */ 1663 if (pcbddc->dbg_flag) { 1664 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1665 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1666 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1667 } 1668 1669 /* loop on constraints and see whether or not they need a change of basis */ 1670 /* -> using implicit ordering contained in temp_indices data */ 1671 total_counts = pcbddc->n_vertices; 1672 primal_counter = total_counts; 1673 while (total_counts<pcbddc->local_primal_size) { 1674 primal_dofs = 1; 1675 if (PetscBTLookup(change_basis,total_counts)) { 1676 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1677 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1678 primal_dofs++; 1679 } 1680 /* get constraint info */ 1681 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1682 dual_dofs = size_of_constraint-primal_dofs; 1683 1684 /* copy quadrature constraints for change of basis check */ 1685 if (pcbddc->dbg_flag) { 1686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1687 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1688 } 1689 1690 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1691 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1692 1693 /* compute QR decomposition of constraints */ 1694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1695 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1696 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1697 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1698 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1699 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1700 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1701 1702 /* explictly compute R^-T */ 1703 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1704 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1705 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1706 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1709 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1710 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1711 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1712 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1713 1714 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1717 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1718 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1720 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1721 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1722 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1723 1724 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1725 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1726 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1727 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1728 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1729 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1731 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1734 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1736 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1737 1738 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1739 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1740 /* insert cols for primal dofs */ 1741 for (j=0;j<primal_dofs;j++) { 1742 start_vals = &qr_basis[j*size_of_constraint]; 1743 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1744 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1745 } 1746 /* insert cols for dual dofs */ 1747 for (j=0,k=0;j<dual_dofs;k++) { 1748 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1749 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1750 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1751 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1752 j++; 1753 } 1754 } 1755 1756 /* check change of basis */ 1757 if (pcbddc->dbg_flag) { 1758 PetscInt ii,jj; 1759 PetscBool valid_qr=PETSC_TRUE; 1760 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1765 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1766 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1767 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1768 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1769 for (jj=0;jj<size_of_constraint;jj++) { 1770 for (ii=0;ii<primal_dofs;ii++) { 1771 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1772 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1773 } 1774 } 1775 if (!valid_qr) { 1776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1777 for (jj=0;jj<size_of_constraint;jj++) { 1778 for (ii=0;ii<primal_dofs;ii++) { 1779 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1780 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1781 } 1782 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1783 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1784 } 1785 } 1786 } 1787 } else { 1788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1789 } 1790 } 1791 /* increment primal counter */ 1792 primal_counter += primal_dofs; 1793 } else { 1794 if (pcbddc->dbg_flag) { 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1796 } 1797 } 1798 /* increment constraint counter total_counts */ 1799 total_counts += primal_dofs; 1800 } 1801 if (pcbddc->dbg_flag) { 1802 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1803 ierr = PetscFree(work);CHKERRQ(ierr); 1804 } 1805 /* free workspace */ 1806 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1807 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1808 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1809 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1810 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1811 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1812 } 1813 /* assembling */ 1814 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1815 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1816 /* 1817 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1818 */ 1819 } 1820 /* free workspace */ 1821 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1822 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1823 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1824 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1825 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1826 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1827 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1828 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1834 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1835 { 1836 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1837 PC_IS *pcis = (PC_IS*)pc->data; 1838 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1839 PetscInt bs,ierr,i,vertex_size; 1840 PetscViewer viewer=pcbddc->dbg_viewer; 1841 1842 PetscFunctionBegin; 1843 /* Init local Graph struct */ 1844 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1845 1846 /* Check validity of the csr graph passed in by the user */ 1847 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1848 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1849 } 1850 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1851 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1852 Mat mat_adj; 1853 const PetscInt *xadj,*adjncy; 1854 PetscBool flg_row=PETSC_TRUE; 1855 1856 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1857 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1858 if (!flg_row) { 1859 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1860 } 1861 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1862 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1863 if (!flg_row) { 1864 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1865 } 1866 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1867 } 1868 1869 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1870 vertex_size = 1; 1871 if (!pcbddc->n_ISForDofs) { 1872 IS *custom_ISForDofs; 1873 1874 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1875 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1876 for (i=0;i<bs;i++) { 1877 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1878 } 1879 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1880 /* remove my references to IS objects */ 1881 for (i=0;i<bs;i++) { 1882 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1883 } 1884 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1885 } else { /* mat block size as vertex size (used for elasticity) */ 1886 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1887 } 1888 1889 /* Setup of Graph */ 1890 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1891 1892 /* Graph's connected components analysis */ 1893 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1894 1895 /* print some info to stdout */ 1896 if (pcbddc->dbg_flag) { 1897 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1904 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1905 { 1906 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1907 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 n = 0; 1912 vertices = 0; 1913 if (pcbddc->ConstraintMatrix) { 1914 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1915 for (i=0;i<local_primal_size;i++) { 1916 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1917 if (size_of_constraint == 1) n++; 1918 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1919 } 1920 if (vertices_idx) { 1921 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1922 n = 0; 1923 for (i=0;i<local_primal_size;i++) { 1924 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1925 if (size_of_constraint == 1) { 1926 vertices[n++]=row_cmat_indices[0]; 1927 } 1928 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1929 } 1930 } 1931 } 1932 *n_vertices = n; 1933 if (vertices_idx) *vertices_idx = vertices; 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1939 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1940 { 1941 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1942 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1943 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1944 PetscBT touched; 1945 PetscErrorCode ierr; 1946 1947 /* This function assumes that the number of local constraints per connected component 1948 is not greater than the number of nodes defined for the connected component 1949 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1950 PetscFunctionBegin; 1951 n = 0; 1952 constraints_index = 0; 1953 if (pcbddc->ConstraintMatrix) { 1954 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1955 max_size_of_constraint = 0; 1956 for (i=0;i<local_primal_size;i++) { 1957 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1958 if (size_of_constraint > 1) { 1959 n++; 1960 } 1961 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1962 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1963 } 1964 if (constraints_idx) { 1965 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1966 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1967 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1968 n = 0; 1969 for (i=0;i<local_primal_size;i++) { 1970 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1971 if (size_of_constraint > 1) { 1972 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1973 /* find first untouched local node */ 1974 j = 0; 1975 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1976 min_index = row_cmat_global_indices[j]; 1977 min_loc = j; 1978 /* search the minimum among nodes not yet touched on the connected component 1979 since there can be more than one constraint on a single cc */ 1980 for (j=1;j<size_of_constraint;j++) { 1981 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1982 min_index = row_cmat_global_indices[j]; 1983 min_loc = j; 1984 } 1985 } 1986 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1987 constraints_index[n++] = row_cmat_indices[min_loc]; 1988 } 1989 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1990 } 1991 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1992 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1993 } 1994 } 1995 *n_constraints = n; 1996 if (constraints_idx) *constraints_idx = constraints_index; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /* the next two functions has been adapted from pcis.c */ 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PCBDDCApplySchur" 2003 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2004 { 2005 PetscErrorCode ierr; 2006 PC_IS *pcis = (PC_IS*)(pc->data); 2007 2008 PetscFunctionBegin; 2009 if (!vec2_B) { vec2_B = v; } 2010 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2011 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2012 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2013 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2014 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2020 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2021 { 2022 PetscErrorCode ierr; 2023 PC_IS *pcis = (PC_IS*)(pc->data); 2024 2025 PetscFunctionBegin; 2026 if (!vec2_B) { vec2_B = v; } 2027 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2028 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2029 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2030 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2031 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "PCBDDCSubsetNumbering" 2037 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2038 { 2039 Vec local_vec,global_vec; 2040 IS seqis,paris; 2041 VecScatter scatter_ctx; 2042 PetscScalar *array; 2043 PetscInt *temp_global_dofs; 2044 PetscScalar globalsum; 2045 PetscInt i,j,s; 2046 PetscInt nlocals,first_index,old_index,max_local; 2047 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2048 PetscMPIInt *dof_sizes,*dof_displs; 2049 PetscBool first_found; 2050 PetscErrorCode ierr; 2051 2052 PetscFunctionBegin; 2053 /* mpi buffers */ 2054 MPI_Comm_size(comm,&size_prec_comm); 2055 MPI_Comm_rank(comm,&rank_prec_comm); 2056 j = ( !rank_prec_comm ? size_prec_comm : 0); 2057 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2058 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2059 /* get maximum size of subset */ 2060 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2061 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2062 max_local = 0; 2063 if (n_local_dofs) { 2064 max_local = temp_global_dofs[0]; 2065 for (i=1;i<n_local_dofs;i++) { 2066 if (max_local < temp_global_dofs[i] ) { 2067 max_local = temp_global_dofs[i]; 2068 } 2069 } 2070 } 2071 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2072 max_global++; 2073 max_local = 0; 2074 if (n_local_dofs) { 2075 max_local = local_dofs[0]; 2076 for (i=1;i<n_local_dofs;i++) { 2077 if (max_local < local_dofs[i] ) { 2078 max_local = local_dofs[i]; 2079 } 2080 } 2081 } 2082 max_local++; 2083 /* allocate workspace */ 2084 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2085 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2086 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2087 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2088 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2089 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2090 /* create scatter */ 2091 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2092 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2093 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2094 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2095 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2096 /* init array */ 2097 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2098 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2099 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2100 if (local_dofs_mult) { 2101 for (i=0;i<n_local_dofs;i++) { 2102 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2103 } 2104 } else { 2105 for (i=0;i<n_local_dofs;i++) { 2106 array[local_dofs[i]]=1.0; 2107 } 2108 } 2109 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2110 /* scatter into global vec and get total number of global dofs */ 2111 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2114 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2115 /* Fill global_vec with cumulative function for global numbering */ 2116 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2117 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2118 nlocals = 0; 2119 first_index = -1; 2120 first_found = PETSC_FALSE; 2121 for (i=0;i<s;i++) { 2122 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2123 first_found = PETSC_TRUE; 2124 first_index = i; 2125 } 2126 nlocals += (PetscInt)PetscRealPart(array[i]); 2127 } 2128 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2129 if (!rank_prec_comm) { 2130 dof_displs[0]=0; 2131 for (i=1;i<size_prec_comm;i++) { 2132 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2133 } 2134 } 2135 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2136 if (first_found) { 2137 array[first_index] += (PetscScalar)nlocals; 2138 old_index = first_index; 2139 for (i=first_index+1;i<s;i++) { 2140 if (PetscRealPart(array[i]) > 0.0) { 2141 array[i] += array[old_index]; 2142 old_index = i; 2143 } 2144 } 2145 } 2146 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2147 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2148 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2149 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2150 /* get global ordering of local dofs */ 2151 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2152 if (local_dofs_mult) { 2153 for (i=0;i<n_local_dofs;i++) { 2154 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2155 } 2156 } else { 2157 for (i=0;i<n_local_dofs;i++) { 2158 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2159 } 2160 } 2161 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2162 /* free workspace */ 2163 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2164 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2165 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2166 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2167 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2168 /* return pointer to global ordering of local dofs */ 2169 *global_numbering_subset = temp_global_dofs; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2175 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2176 { 2177 PetscInt i,j; 2178 PetscScalar *alphas; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 /* this implements stabilized Gram-Schmidt */ 2183 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2184 for (i=0;i<n;i++) { 2185 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2186 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2187 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2188 } 2189 ierr = PetscFree(alphas);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatConvert_IS_AIJ" 2196 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2197 { 2198 Mat new_mat; 2199 Mat_IS *matis = (Mat_IS*)(mat->data); 2200 /* info on mat */ 2201 PetscInt bs,rows,cols; 2202 PetscInt lrows,lcols; 2203 PetscInt local_rows,local_cols; 2204 PetscMPIInt nsubdomains,rank; 2205 /* preallocation */ 2206 Vec vec_dnz,vec_onz; 2207 PetscScalar *my_dnz,*my_onz,*array; 2208 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2209 PetscInt index_row,index_col,owner; 2210 PetscInt *local_indices,*global_indices; 2211 /* work */ 2212 PetscInt i,j; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 /* CHECKS */ 2217 /* get info from mat */ 2218 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2219 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2220 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2221 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2222 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2223 2224 /* MAT_INITIAL_MATRIX */ 2225 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2226 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2227 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2228 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2229 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2230 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2231 2232 /* 2233 preallocation 2234 */ 2235 2236 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2237 /* 2238 Some vectors are needed to sum up properly on shared interface dofs. 2239 Note that preallocation is not exact, since it overestimates nonzeros 2240 */ 2241 /* 2242 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2243 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2244 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2245 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2246 */ 2247 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2248 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2249 /* All processes should compute entire row ownership */ 2250 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2251 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2252 for (i=0;i<nsubdomains;i++) { 2253 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2254 row_ownership[j]=i; 2255 } 2256 } 2257 /* map indices of local mat to global values */ 2258 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2259 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2260 for (i=0;i<local_rows;i++) local_indices[i]=i; 2261 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2262 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2263 2264 /* my_dnz and my_onz contains exact contribution to preallocation from each local mat */ 2265 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2266 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2267 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2268 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2269 for (i=0;i<local_rows;i++) { 2270 index_row = global_indices[i]; 2271 for (j=i;j<local_rows;j++) { 2272 owner = row_ownership[index_row]; 2273 index_col = global_indices[j]; 2274 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2275 my_dnz[i] += 1.0; 2276 } else { /* offdiag block */ 2277 my_onz[i] += 1.0; 2278 } 2279 /* same as before, interchanging rows and cols */ 2280 if (i != j) { 2281 owner = row_ownership[index_col]; 2282 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2283 my_dnz[j] += 1.0; 2284 } else { 2285 my_onz[j] += 1.0; 2286 } 2287 } 2288 } 2289 } 2290 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2291 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2292 if (local_rows) { 2293 /* this way, preallocation is always sufficient */ 2294 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2295 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2296 } 2297 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2298 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2299 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2300 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2301 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2302 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2303 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2304 2305 /* set computed preallocation in dnz and onz */ 2306 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2307 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2308 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2309 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2310 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2311 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2312 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2313 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2314 2315 /* Resize preallocation if too much overstimated */ 2316 for (i=0;i<lrows;i++) { 2317 dnz[i] = PetscMin(dnz[i],lcols); 2318 onz[i] = PetscMin(onz[i],cols-lcols); 2319 } 2320 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2321 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2322 2323 /* 2324 SET VALUES. Very Basic. 2325 */ 2326 for (i=0;i<local_rows;i++) { 2327 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2328 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2329 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2330 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2331 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2332 } 2333 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2334 2335 /* get back mat */ 2336 *M = new_mat; 2337 PetscFunctionReturn(0); 2338 } 2339 2340 2341 2342 /* BDDC requires metis 5.0.1 for multilevel */ 2343 #if defined(PETSC_HAVE_METIS) 2344 #include "metis.h" 2345 #define MetisInt idx_t 2346 #define MetisScalar real_t 2347 #endif 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2351 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2352 { 2353 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2354 PC_IS *pcis = (PC_IS*)pc->data; 2355 MPI_Comm prec_comm; 2356 MPI_Comm coarse_comm; 2357 Mat coarse_mat; 2358 2359 PetscInt coarse_size; 2360 2361 MatNullSpace CoarseNullSpace; 2362 2363 /* common to all choiches */ 2364 PetscScalar *temp_coarse_mat_vals; 2365 PetscScalar *ins_coarse_mat_vals; 2366 PetscInt *ins_local_primal_indices; 2367 PetscMPIInt *localsizes2,*localdispl2; 2368 PetscMPIInt size_prec_comm; 2369 PetscMPIInt rank_prec_comm; 2370 PetscMPIInt active_rank=MPI_PROC_NULL; 2371 PetscMPIInt master_proc=0; 2372 PetscInt ins_local_primal_size; 2373 /* specific to MULTILEVEL_BDDC */ 2374 PetscMPIInt *ranks_recv=0; 2375 PetscMPIInt count_recv=0; 2376 PetscMPIInt rank_coarse_proc_send_to=-1; 2377 PetscMPIInt coarse_color = MPI_UNDEFINED; 2378 ISLocalToGlobalMapping coarse_ISLG; 2379 /* some other variables */ 2380 PetscErrorCode ierr; 2381 MatType coarse_mat_type; 2382 PCType coarse_pc_type; 2383 KSPType coarse_ksp_type; 2384 PC pc_temp; 2385 PetscInt i,j,k; 2386 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2387 /* verbose output viewer */ 2388 PetscViewer viewer=pcbddc->dbg_viewer; 2389 PetscInt dbg_flag=pcbddc->dbg_flag; 2390 2391 PetscInt offset,offset2; 2392 PetscMPIInt im_active,active_procs; 2393 PetscInt *dnz; 2394 2395 PetscBool setsym,issym=PETSC_FALSE; 2396 2397 PetscInt *replicated_local_primal_indices=0,*local_primal_indices=0,*local_primal_displacements=0,*local_primal_sizes=0; 2398 PetscInt replicated_primal_size=0; 2399 2400 PetscFunctionBegin; 2401 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2402 ins_local_primal_indices = 0; 2403 ins_coarse_mat_vals = 0; 2404 localsizes2 = 0; 2405 localdispl2 = 0; 2406 temp_coarse_mat_vals = 0; 2407 coarse_ISLG = 0; 2408 2409 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2410 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2411 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2412 2413 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 2414 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2415 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2416 } 2417 2418 /* Assign global numbering to coarse dofs */ 2419 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2420 2421 im_active = 0; 2422 if (pcis->n) im_active = 1; 2423 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 2424 2425 /* adapt coarse problem type */ 2426 #if defined(PETSC_HAVE_METIS) 2427 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2428 if (pcbddc->current_level < pcbddc->max_levels) { 2429 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2430 if (dbg_flag) { 2431 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2432 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2433 } 2434 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2435 } 2436 } else { 2437 if (dbg_flag) { 2438 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2439 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2440 } 2441 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2442 } 2443 } 2444 #else 2445 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2446 #endif 2447 2448 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2449 Mat coarse_mat_is,coarse_submat_dense; 2450 ISLocalToGlobalMapping coarse_islg; 2451 IS coarse_is; 2452 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2453 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2454 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2455 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2456 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2457 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2458 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2459 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2460 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2461 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2462 /* just for now */ 2463 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2464 coarse_mat_type = MATAIJ; 2465 coarse_pc_type = PCREDUNDANT; 2466 coarse_ksp_type = KSPPREONLY; 2467 coarse_comm = prec_comm; 2468 active_rank = rank_prec_comm; 2469 } else { 2470 /* OLD version from here */ 2471 2472 /* Construct needed data structures for message passing */ 2473 j = 0; 2474 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2475 j = size_prec_comm; 2476 } 2477 ierr = PetscMalloc(j*sizeof(*local_primal_sizes),&local_primal_sizes);CHKERRQ(ierr); 2478 ierr = PetscMalloc(j*sizeof(*local_primal_displacements),&local_primal_displacements);CHKERRQ(ierr); 2479 /* Gather local_primal_size information for all processes */ 2480 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2481 ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2482 } else { 2483 ierr = MPI_Gather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2484 } 2485 replicated_primal_size = 0; 2486 for (i=0; i<j; i++) { 2487 local_primal_displacements[i] = replicated_primal_size ; 2488 replicated_primal_size += local_primal_sizes[i]; 2489 } 2490 2491 switch(pcbddc->coarse_problem_type){ 2492 2493 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2494 { 2495 #if defined(PETSC_HAVE_METIS) 2496 /* we need additional variables */ 2497 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2498 MetisInt *metis_coarse_subdivision; 2499 MetisInt options[METIS_NOPTIONS]; 2500 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2501 PetscMPIInt procs_jumps_coarse_comm; 2502 PetscMPIInt *coarse_subdivision; 2503 PetscMPIInt *total_count_recv; 2504 PetscMPIInt *total_ranks_recv; 2505 PetscMPIInt *displacements_recv; 2506 PetscMPIInt *my_faces_connectivity; 2507 PetscMPIInt *petsc_faces_adjncy; 2508 MetisInt *faces_adjncy; 2509 MetisInt *faces_xadj; 2510 PetscMPIInt *number_of_faces; 2511 PetscMPIInt *faces_displacements; 2512 PetscInt *array_int; 2513 PetscMPIInt my_faces=0; 2514 PetscMPIInt total_faces=0; 2515 PetscInt ranks_stretching_ratio; 2516 2517 /* define some quantities */ 2518 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2519 coarse_mat_type = MATIS; 2520 coarse_pc_type = PCBDDC; 2521 coarse_ksp_type = KSPRICHARDSON; 2522 2523 /* details of coarse decomposition */ 2524 n_subdomains = active_procs; 2525 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2526 ranks_stretching_ratio = size_prec_comm/active_procs; 2527 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2528 2529 #if 0 2530 PetscMPIInt *old_ranks; 2531 PetscInt *new_ranks,*jj,*ii; 2532 MatPartitioning mat_part; 2533 IS coarse_new_decomposition,is_numbering; 2534 PetscViewer viewer_test; 2535 MPI_Comm test_coarse_comm; 2536 PetscMPIInt test_coarse_color; 2537 Mat mat_adj; 2538 /* Create new communicator for coarse problem splitting the old one */ 2539 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2540 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2541 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 2542 test_coarse_comm = MPI_COMM_NULL; 2543 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 2544 if (im_active) { 2545 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 2546 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 2547 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2548 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 2549 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 2550 for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1; 2551 for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i; 2552 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 2553 k = pcis->n_neigh-1; 2554 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 2555 ii[0]=0; 2556 ii[1]=k; 2557 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 2558 for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]]; 2559 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 2560 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 2561 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 2562 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 2563 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 2564 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 2565 printf("Setting Nparts %d\n",n_parts); 2566 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 2567 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 2568 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 2569 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 2570 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 2571 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 2572 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 2573 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 2574 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 2575 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 2576 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 2577 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 2578 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 2579 } 2580 #endif 2581 2582 /* build CSR graph of subdomains' connectivity */ 2583 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2584 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2585 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2586 for (j=0;j<pcis->n_shared[i];j++){ 2587 array_int[ pcis->shared[i][j] ]+=1; 2588 } 2589 } 2590 for (i=1;i<pcis->n_neigh;i++){ 2591 for (j=0;j<pcis->n_shared[i];j++){ 2592 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2593 my_faces++; 2594 break; 2595 } 2596 } 2597 } 2598 2599 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2600 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2601 my_faces=0; 2602 for (i=1;i<pcis->n_neigh;i++){ 2603 for (j=0;j<pcis->n_shared[i];j++){ 2604 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2605 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2606 my_faces++; 2607 break; 2608 } 2609 } 2610 } 2611 if (rank_prec_comm == master_proc) { 2612 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2613 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2614 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2615 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2616 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2617 } 2618 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2619 if (rank_prec_comm == master_proc) { 2620 faces_xadj[0]=0; 2621 faces_displacements[0]=0; 2622 j=0; 2623 for (i=1;i<size_prec_comm+1;i++) { 2624 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2625 if (number_of_faces[i-1]) { 2626 j++; 2627 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2628 } 2629 } 2630 } 2631 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2632 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2633 ierr = PetscFree(array_int);CHKERRQ(ierr); 2634 if (rank_prec_comm == master_proc) { 2635 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2636 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2637 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2638 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2639 } 2640 2641 if ( rank_prec_comm == master_proc ) { 2642 2643 PetscInt heuristic_for_metis=3; 2644 2645 ncon=1; 2646 faces_nvtxs=n_subdomains; 2647 /* partition graoh induced by face connectivity */ 2648 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2649 ierr = METIS_SetDefaultOptions(options); 2650 /* we need a contiguous partition of the coarse mesh */ 2651 options[METIS_OPTION_CONTIG]=1; 2652 options[METIS_OPTION_NITER]=30; 2653 if (pcbddc->coarsening_ratio > 1) { 2654 if (n_subdomains>n_parts*heuristic_for_metis) { 2655 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2656 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2657 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2658 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2659 } else { 2660 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2661 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2662 } 2663 } else { 2664 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2665 } 2666 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2667 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2668 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2669 2670 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2671 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2672 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2673 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2674 } 2675 2676 /* Create new communicator for coarse problem splitting the old one */ 2677 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2678 coarse_color=0; /* for communicator splitting */ 2679 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2680 } 2681 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2682 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2683 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2684 2685 if ( coarse_color == 0 ) { 2686 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2687 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2688 } else { 2689 rank_coarse_comm = MPI_PROC_NULL; 2690 } 2691 2692 /* master proc take care of arranging and distributing coarse information */ 2693 if (rank_coarse_comm == master_proc) { 2694 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2695 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2696 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2697 /* some initializations */ 2698 displacements_recv[0]=0; 2699 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2700 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2701 for (j=0;j<size_coarse_comm;j++) { 2702 for (i=0;i<size_prec_comm;i++) { 2703 if (coarse_subdivision[i]==j) total_count_recv[j]++; 2704 } 2705 } 2706 /* displacements needed for scatterv of total_ranks_recv */ 2707 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2708 2709 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2710 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2711 for (j=0;j<size_coarse_comm;j++) { 2712 for (i=0;i<size_prec_comm;i++) { 2713 if (coarse_subdivision[i]==j) { 2714 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2715 total_count_recv[j]+=1; 2716 } 2717 } 2718 } 2719 /*for (j=0;j<size_coarse_comm;j++) { 2720 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2721 for (i=0;i<total_count_recv[j];i++) { 2722 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2723 } 2724 printf("\n"); 2725 }*/ 2726 2727 /* identify new decomposition in terms of ranks in the old communicator */ 2728 for (i=0;i<n_subdomains;i++) { 2729 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2730 } 2731 /*printf("coarse_subdivision in old end new ranks\n"); 2732 for (i=0;i<size_prec_comm;i++) 2733 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 2734 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2735 } else { 2736 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2737 } 2738 printf("\n");*/ 2739 } 2740 2741 /* Scatter new decomposition for send details */ 2742 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2743 /* Scatter receiving details to members of coarse decomposition */ 2744 if ( coarse_color == 0) { 2745 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2746 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2747 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2748 } 2749 2750 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2751 if (coarse_color == 0) { 2752 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2753 for (i=0;i<count_recv;i++) 2754 printf("%d ",ranks_recv[i]); 2755 printf("\n"); 2756 }*/ 2757 2758 if (rank_prec_comm == master_proc) { 2759 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2760 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2761 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2762 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2763 } 2764 #endif 2765 break; 2766 } 2767 2768 case(PARALLEL_BDDC): 2769 2770 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2771 coarse_mat_type = MATAIJ; 2772 coarse_pc_type = PCREDUNDANT; 2773 coarse_ksp_type = KSPPREONLY; 2774 coarse_comm = prec_comm; 2775 active_rank = rank_prec_comm; 2776 break; 2777 2778 } 2779 2780 switch(pcbddc->coarse_communications_type){ 2781 2782 case(SCATTERS_BDDC): 2783 { 2784 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2785 2786 IS coarse_IS; 2787 2788 if(pcbddc->coarsening_ratio == 1) { 2789 ins_local_primal_size = pcbddc->local_primal_size; 2790 ins_local_primal_indices = local_primal_indices; 2791 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2792 /* nonzeros */ 2793 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2794 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2795 for (i=0;i<ins_local_primal_size;i++) { 2796 dnz[i] = ins_local_primal_size; 2797 } 2798 } else { 2799 PetscMPIInt send_size; 2800 PetscMPIInt *send_buffer; 2801 PetscInt *aux_ins_indices; 2802 PetscInt ii,jj; 2803 MPI_Request *requests; 2804 2805 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2806 /* reusing local_primal_displacements and replicated_primal_size */ 2807 ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr); 2808 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&local_primal_displacements);CHKERRQ(ierr); 2809 replicated_primal_size = count_recv; 2810 j = 0; 2811 for (i=0;i<count_recv;i++) { 2812 local_primal_displacements[i] = j; 2813 j += local_primal_sizes[ranks_recv[i]]; 2814 } 2815 local_primal_displacements[count_recv] = j; 2816 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&replicated_local_primal_indices);CHKERRQ(ierr); 2817 /* allocate auxiliary space */ 2818 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2819 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2820 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2821 /* allocate stuffs for message massing */ 2822 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2823 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 2824 /* send indices to be inserted */ 2825 for (i=0;i<count_recv;i++) { 2826 send_size = local_primal_sizes[ranks_recv[i]]; 2827 ierr = MPI_Irecv(&replicated_local_primal_indices[local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 2828 } 2829 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2830 send_size = pcbddc->local_primal_size; 2831 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2832 for (i=0;i<send_size;i++) { 2833 send_buffer[i]=(PetscMPIInt)local_primal_indices[i]; 2834 } 2835 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2836 } 2837 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2838 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2839 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2840 } 2841 j = 0; 2842 for (i=0;i<count_recv;i++) { 2843 ii = local_primal_displacements[i+1]-local_primal_displacements[i]; 2844 localsizes2[i] = ii*ii; 2845 localdispl2[i] = j; 2846 j += localsizes2[i]; 2847 jj = local_primal_displacements[i]; 2848 /* it counts the coarse subdomains sharing the coarse node */ 2849 for (k=0;k<ii;k++) { 2850 aux_ins_indices[replicated_local_primal_indices[jj+k]] += 1; 2851 } 2852 } 2853 /* temp_coarse_mat_vals used to store matrix values to be received */ 2854 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2855 /* evaluate how many values I will insert in coarse mat */ 2856 ins_local_primal_size = 0; 2857 for (i=0;i<coarse_size;i++) { 2858 if (aux_ins_indices[i]) { 2859 ins_local_primal_size++; 2860 } 2861 } 2862 /* evaluate indices I will insert in coarse mat */ 2863 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2864 j = 0; 2865 for(i=0;i<coarse_size;i++) { 2866 if(aux_ins_indices[i]) { 2867 ins_local_primal_indices[j] = i; 2868 j++; 2869 } 2870 } 2871 /* processes partecipating in coarse problem receive matrix data from their friends */ 2872 for (i=0;i<count_recv;i++) { 2873 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2874 } 2875 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2876 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 2877 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2878 } 2879 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2880 /* nonzeros */ 2881 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 2882 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2883 /* use aux_ins_indices to realize a global to local mapping */ 2884 j=0; 2885 for(i=0;i<coarse_size;i++){ 2886 if(aux_ins_indices[i]==0){ 2887 aux_ins_indices[i]=-1; 2888 } else { 2889 aux_ins_indices[i]=j; 2890 j++; 2891 } 2892 } 2893 for (i=0;i<count_recv;i++) { 2894 j = local_primal_sizes[ranks_recv[i]]; 2895 for (k=0;k<j;k++) { 2896 dnz[aux_ins_indices[replicated_local_primal_indices[local_primal_displacements[i]+k]]] += j; 2897 } 2898 } 2899 /* check */ 2900 for (i=0;i<ins_local_primal_size;i++) { 2901 if (dnz[i] > ins_local_primal_size) { 2902 dnz[i] = ins_local_primal_size; 2903 } 2904 } 2905 ierr = PetscFree(requests);CHKERRQ(ierr); 2906 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2907 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2908 } 2909 /* create local to global mapping needed by coarse MATIS */ 2910 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 2911 coarse_comm = prec_comm; 2912 active_rank = rank_prec_comm; 2913 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2914 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2915 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2916 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2917 /* arrays for values insertion */ 2918 ins_local_primal_size = pcbddc->local_primal_size; 2919 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2920 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2921 for (j=0;j<ins_local_primal_size;j++){ 2922 ins_local_primal_indices[j]=local_primal_indices[j]; 2923 for (i=0;i<ins_local_primal_size;i++) { 2924 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2925 } 2926 } 2927 } 2928 break; 2929 2930 } 2931 2932 } 2933 } 2934 /* Now create and fill up coarse matrix */ 2935 if ( rank_prec_comm == active_rank ) { 2936 2937 Mat matis_coarse_local_mat; 2938 2939 2940 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2941 #if 0 2942 ierr = MatCreate(coarse_comm,&coarse_mat);CHKERRQ(ierr); 2943 ierr = MatSetSizes(coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr); 2944 ierr = MatSetType(coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2945 ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr); 2946 ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr); 2947 ierr = MatSetUp(coarse_mat);CHKERRQ(ierr); 2948 ierr = MatSetOption(coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2949 ierr = MatSetOption(coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2950 #endif 2951 } else { 2952 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr); 2953 ierr = MatSetUp(coarse_mat);CHKERRQ(ierr); 2954 ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2955 ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr); 2956 ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr); 2957 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2958 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2959 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2960 } 2961 /* preallocation */ 2962 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2963 2964 #if 0 2965 PetscInt lrows,lcols,bs; 2966 2967 ierr = MatGetLocalSize(coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 2968 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 2969 ierr = MatGetBlockSize(coarse_mat,&bs);CHKERRQ(ierr); 2970 2971 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2972 2973 Vec vec_dnz,vec_onz; 2974 PetscScalar *my_dnz,*my_onz,*array; 2975 PetscInt *mat_ranges,*row_ownership; 2976 PetscInt coarse_index_row,coarse_index_col,owner; 2977 2978 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 2979 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2980 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr); 2981 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 2982 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2983 2984 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 2985 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 2986 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 2987 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 2988 2989 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 2990 ierr = MatGetOwnershipRanges(coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2991 for (i=0;i<size_prec_comm;i++) { 2992 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2993 row_ownership[j]=i; 2994 } 2995 } 2996 2997 for (i=0;i<pcbddc->local_primal_size;i++) { 2998 coarse_index_row = local_primal_indices[i]; 2999 owner = row_ownership[coarse_index_row]; 3000 for (j=i;j<pcbddc->local_primal_size;j++) { 3001 owner = row_ownership[coarse_index_row]; 3002 coarse_index_col = local_primal_indices[j]; 3003 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 3004 my_dnz[i] += 1.0; 3005 } else { 3006 my_onz[i] += 1.0; 3007 } 3008 if (i != j) { 3009 owner = row_ownership[coarse_index_col]; 3010 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 3011 my_dnz[j] += 1.0; 3012 } else { 3013 my_onz[j] += 1.0; 3014 } 3015 } 3016 } 3017 } 3018 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 3019 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 3020 if (pcbddc->local_primal_size) { 3021 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 3022 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 3023 } 3024 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 3025 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 3026 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 3027 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 3028 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 3029 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 3030 for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 3031 3032 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 3033 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 3034 for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 3035 3036 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 3037 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 3038 ierr = PetscFree(my_onz);CHKERRQ(ierr); 3039 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 3040 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 3041 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 3042 } 3043 3044 /* check */ 3045 for (i=0;i<lrows;i++) { 3046 if (dnz[i]>lcols) dnz[i]=lcols; 3047 if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols; 3048 } 3049 ierr = MatSeqAIJSetPreallocation(coarse_mat,0,dnz);CHKERRQ(ierr); 3050 ierr = MatMPIAIJSetPreallocation(coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 3051 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3052 #endif 3053 } else { 3054 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3055 ierr = PetscFree(dnz);CHKERRQ(ierr); 3056 } 3057 3058 /* insert values */ 3059 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 3060 #if 0 3061 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3062 #endif 3063 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3064 if (pcbddc->coarsening_ratio == 1) { 3065 ins_coarse_mat_vals = coarse_submat_vals; 3066 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3067 } else { 3068 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3069 for (k=0;k<replicated_primal_size;k++) { 3070 offset = local_primal_displacements[k]; 3071 offset2 = localdispl2[k]; 3072 ins_local_primal_size = local_primal_displacements[k+1]-local_primal_displacements[k]; 3073 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3074 for (j=0;j<ins_local_primal_size;j++){ 3075 ins_local_primal_indices[j]=(PetscInt)replicated_local_primal_indices[offset+j]; 3076 } 3077 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3078 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3079 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3080 } 3081 } 3082 ins_local_primal_indices = 0; 3083 ins_coarse_mat_vals = 0; 3084 } 3085 ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3086 ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3087 /* symmetry of coarse matrix */ 3088 if (issym) { 3089 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3090 } 3091 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3092 } 3093 3094 /* create loc to glob scatters if needed */ 3095 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3096 IS local_IS,global_IS; 3097 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3098 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3099 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3100 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3101 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3102 } 3103 3104 /* free memory no longer needed */ 3105 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3106 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3107 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3108 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3109 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3110 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3111 3112 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3113 ierr = PetscFree(local_primal_sizes);CHKERRQ(ierr); 3114 ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr); 3115 ierr = PetscFree(replicated_local_primal_indices);CHKERRQ(ierr); 3116 3117 /* Compute coarse null space */ 3118 CoarseNullSpace = 0; 3119 if (pcbddc->NullSpace) { 3120 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3121 } 3122 3123 /* KSP for coarse problem */ 3124 if (rank_prec_comm == active_rank) { 3125 PetscBool isbddc=PETSC_FALSE; 3126 3127 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3128 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3129 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3130 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3131 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3132 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3133 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3134 /* Allow user's customization */ 3135 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3136 /* Set Up PC for coarse problem BDDC */ 3137 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3138 i = pcbddc->current_level+1; 3139 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 3140 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3141 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3142 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3143 if (CoarseNullSpace) { 3144 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3145 } 3146 if (dbg_flag) { 3147 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 3148 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3149 } 3150 } else { 3151 if (CoarseNullSpace) { 3152 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3153 } 3154 } 3155 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3156 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3157 3158 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 3159 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3160 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3161 if (j == 1) { 3162 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3163 if (isbddc) { 3164 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3165 } 3166 } 3167 } 3168 /* Check coarse problem if requested */ 3169 if ( dbg_flag && rank_prec_comm == active_rank ) { 3170 KSP check_ksp; 3171 PC check_pc; 3172 Vec check_vec; 3173 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3174 KSPType check_ksp_type; 3175 3176 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3177 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 3178 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3179 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3180 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3181 if (issym) check_ksp_type = KSPCG; 3182 else check_ksp_type = KSPGMRES; 3183 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3184 } else { 3185 check_ksp_type = KSPPREONLY; 3186 } 3187 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3188 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3189 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3190 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3191 /* create random vec */ 3192 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3193 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3194 if (CoarseNullSpace) { 3195 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3196 } 3197 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3198 /* solve coarse problem */ 3199 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3200 if (CoarseNullSpace) { 3201 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3202 } 3203 /* check coarse problem residual error */ 3204 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3205 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3206 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3207 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3208 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3209 /* get eigenvalue estimation if inexact */ 3210 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3211 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3212 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 3213 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 3214 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3215 } 3216 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3217 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3218 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3219 } 3220 if (dbg_flag) { 3221 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3222 } 3223 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3224 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3230 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3231 { 3232 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3233 PC_IS* pcis = (PC_IS*)pc->data; 3234 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3235 PetscInt i,j,coarse_size; 3236 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3237 PetscErrorCode ierr; 3238 3239 PetscFunctionBegin; 3240 /* get indices in local ordering for vertices and constraints */ 3241 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3242 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3243 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3244 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3245 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3246 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3247 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3248 3249 /* Compute global number of coarse dofs */ 3250 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3251 3252 /* check numbering */ 3253 if (pcbddc->dbg_flag) { 3254 PetscScalar coarsesum,*array; 3255 3256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3259 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3260 for (i=0;i<pcbddc->local_primal_size;i++) { 3261 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3262 } 3263 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3264 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3265 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3266 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3267 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3268 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3269 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3270 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3271 for (i=0;i<pcis->n;i++) { 3272 if (array[i] == 1.0) { 3273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3274 } 3275 } 3276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3277 for (i=0;i<pcis->n;i++) { 3278 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3279 } 3280 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3281 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3282 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3283 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3284 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3285 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3286 if (pcbddc->dbg_flag > 1) { 3287 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3290 for (i=0;i<pcbddc->local_primal_size;i++) { 3291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3292 } 3293 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3294 } 3295 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3296 } 3297 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3298 /* get back data */ 3299 *coarse_size_n = coarse_size; 3300 *local_primal_indices_n = local_primal_indices; 3301 PetscFunctionReturn(0); 3302 } 3303 3304