1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 105 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 106 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 107 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 110 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 113 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 114 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 116 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCBDDCCreateWorkVectors" 122 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 123 { 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PC_IS *pcis = (PC_IS*)pc->data; 126 VecType impVecType; 127 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 132 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 133 local_primal_size = n_constraints+n_vertices; 134 n_R = pcis->n-n_vertices; 135 /* local work vectors */ 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 138 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 139 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 140 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 141 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 145 if (n_constraints) { 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 149 } 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 155 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 156 { 157 PetscErrorCode ierr; 158 /* pointers to pcis and pcbddc */ 159 PC_IS* pcis = (PC_IS*)pc->data; 160 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 161 /* submatrices of local problem */ 162 Mat A_RV,A_VR,A_VV; 163 /* working matrices */ 164 Mat M1,M2,M3,C_CR; 165 /* working vectors */ 166 Vec vec1_C,vec2_C,vec1_V,vec2_V; 167 /* additional working stuff */ 168 IS is_aux; 169 ISLocalToGlobalMapping BtoNmap; 170 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 171 const PetscScalar *array,*row_cmat_values; 172 const PetscInt *row_cmat_indices,*idx_R_local; 173 PetscInt *vertices,*idx_V_B,*auxindices; 174 PetscInt n_vertices,n_constraints,size_of_constraint; 175 PetscInt i,j,n_R,n_D,n_B; 176 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 177 /* Vector and matrix types */ 178 VecType impVecType; 179 MatType impMatType; 180 /* some shortcuts to scalars */ 181 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 182 /* for debugging purposes */ 183 PetscReal *coarsefunctions_errors,*constraints_errors; 184 185 PetscFunctionBegin; 186 /* get number of vertices and their local indices */ 187 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 188 n_constraints = pcbddc->local_primal_size-n_vertices; 189 /* Set Non-overlapping dimensions */ 190 n_B = pcis->n_B; n_D = pcis->n - n_B; 191 n_R = pcis->n-n_vertices; 192 193 /* Set types for local objects needed by BDDC precondtioner */ 194 impMatType = MATSEQDENSE; 195 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 196 197 /* Allocating some extra storage just to be safe */ 198 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 199 for (i=0;i<pcis->n;i++) auxindices[i]=i; 200 201 /* vertices in boundary numbering */ 202 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 203 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 204 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 206 if (i != n_vertices) { 207 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 208 } 209 210 /* some work vectors on vertices and/or constraints */ 211 if (n_vertices) { 212 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 213 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 214 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 215 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 216 } 217 if (n_constraints) { 218 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 219 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 220 } 221 222 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 223 if (n_constraints) { 224 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 225 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 226 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 227 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 228 229 /* Extract constraints on R nodes: C_{CR} */ 230 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 231 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 232 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 233 234 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 235 for (i=0;i<n_constraints;i++) { 236 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 237 /* Get row of constraint matrix in R numbering */ 238 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 239 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 240 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 241 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 242 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 243 /* Solve for row of constraint matrix in R numbering */ 244 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 245 /* Set values in local_auxmat2 */ 246 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 247 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 248 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 249 } 250 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 253 254 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 255 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 256 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 257 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 258 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 259 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 260 ierr = MatSetUp(M1);CHKERRQ(ierr); 261 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 262 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 263 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 264 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 266 ierr = MatDestroy(&M2);CHKERRQ(ierr); 267 ierr = MatDestroy(&M3);CHKERRQ(ierr); 268 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 269 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 270 } 271 272 /* Get submatrices from subdomain matrix */ 273 if (n_vertices) { 274 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 275 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 276 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 277 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 278 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 279 } 280 281 /* Matrix of coarse basis functions (local) */ 282 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 283 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 284 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 285 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 286 if (pcbddc->switch_static || pcbddc->dbg_flag) { 287 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 288 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 289 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 290 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 291 } 292 293 if (pcbddc->dbg_flag) { 294 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 295 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 296 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 297 } 298 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 299 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 300 301 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 302 303 /* vertices */ 304 for (i=0;i<n_vertices;i++) { 305 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 306 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 307 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 308 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 309 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 310 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 311 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 313 if (n_constraints) { 314 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 315 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 316 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 317 } 318 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 319 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 320 321 /* Set values in coarse basis function and subdomain part of coarse_mat */ 322 /* coarse basis functions */ 323 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 324 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 325 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 326 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 327 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 328 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 329 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 330 if (pcbddc->switch_static || pcbddc->dbg_flag) { 331 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 333 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 334 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 335 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 336 } 337 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 338 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 339 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 341 if (n_constraints) { 342 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 343 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 345 } 346 347 /* check */ 348 if (pcbddc->dbg_flag) { 349 /* assemble subdomain vector on local nodes */ 350 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 351 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 352 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 354 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 355 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 356 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 357 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 358 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 359 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 360 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 362 if (n_constraints) { 363 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 364 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 365 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 366 } 367 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 368 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 369 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 370 /* check saddle point solution */ 371 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 372 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 373 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 374 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 375 /* shift by the identity matrix */ 376 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 379 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 380 } 381 } 382 383 /* constraints */ 384 for (i=0;i<n_constraints;i++) { 385 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 386 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 387 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 388 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 389 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 390 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 391 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 392 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 393 if (n_vertices) { 394 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 395 } 396 /* Set values in coarse basis function and subdomain part of coarse_mat */ 397 /* coarse basis functions */ 398 j = i+n_vertices; /* don't touch this! */ 399 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 400 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 405 if (pcbddc->switch_static || pcbddc->dbg_flag) { 406 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 409 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 411 } 412 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 413 if (n_vertices) { 414 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 417 } 418 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 420 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 421 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 429 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 430 /* assemble subdomain vector of lagrange multipliers */ 431 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 432 if (n_vertices) { 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 } 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 441 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 443 /* check saddle point solution */ 444 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 445 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 447 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 448 /* shift by the identity matrix */ 449 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 450 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 453 } 454 } 455 /* call assembling routines for local coarse basis */ 456 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 if (pcbddc->switch_static || pcbddc->dbg_flag) { 459 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 460 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 461 } 462 463 /* compute other basis functions for non-symmetric problems */ 464 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 465 if (!setsym || (setsym && !issym)) { 466 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 467 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 468 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 469 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 470 if (pcbddc->switch_static || pcbddc->dbg_flag) { 471 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 472 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 473 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 474 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 475 } 476 for (i=0;i<pcbddc->local_primal_size;i++) { 477 if (n_constraints) { 478 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 479 for (j=0;j<n_constraints;j++) { 480 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 481 } 482 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 483 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 484 } 485 if (i<n_vertices) { 486 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 487 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 488 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 489 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 490 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 491 if (n_constraints) { 492 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 493 } 494 } else { 495 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 496 } 497 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 498 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 502 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 504 if (i<n_vertices) { 505 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 506 } 507 if (pcbddc->switch_static || pcbddc->dbg_flag) { 508 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 513 } 514 515 if (pcbddc->dbg_flag) { 516 /* assemble subdomain vector on nodes */ 517 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 518 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 519 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 520 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 521 if (i<n_vertices) { 522 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 523 } 524 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 525 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 526 /* assemble subdomain vector of lagrange multipliers */ 527 for (j=0;j<pcbddc->local_primal_size;j++) { 528 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 529 } 530 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 532 /* check saddle point solution */ 533 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 534 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 535 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 536 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 537 /* shift by the identity matrix */ 538 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 542 } 543 } 544 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 if (pcbddc->switch_static || pcbddc->dbg_flag) { 547 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 } 550 } 551 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 552 /* Checking coarse_sub_mat and coarse basis functios */ 553 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 554 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 555 if (pcbddc->dbg_flag) { 556 Mat coarse_sub_mat; 557 Mat AUXMAT,TM1,TM2,TM3,TM4; 558 Mat coarse_phi_D,coarse_phi_B; 559 Mat coarse_psi_D,coarse_psi_B; 560 Mat A_II,A_BB,A_IB,A_BI; 561 MatType checkmattype=MATSEQAIJ; 562 PetscReal real_value; 563 564 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 565 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 566 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 567 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 568 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 569 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 570 if (pcbddc->coarse_psi_B) { 571 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 572 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 573 } 574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 575 576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 579 if (pcbddc->coarse_psi_B) { 580 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 581 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 582 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 583 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 584 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 585 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 586 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 } else { 593 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 594 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 595 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 } 602 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 603 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 604 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 605 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 606 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 607 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 612 for (i=0;i<pcbddc->local_primal_size;i++) { 613 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 614 } 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 616 for (i=0;i<pcbddc->local_primal_size;i++) { 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 618 } 619 if (pcbddc->coarse_psi_B) { 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 621 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 623 } 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 625 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 627 } 628 } 629 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 630 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 631 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 632 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 633 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 634 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 635 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 636 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 637 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 638 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 639 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 640 if (pcbddc->coarse_psi_B) { 641 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 642 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 643 } 644 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 645 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 646 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 647 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 648 } 649 /* free memory */ 650 if (n_vertices) { 651 ierr = PetscFree(vertices);CHKERRQ(ierr); 652 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 653 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 654 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 655 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 656 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 657 } 658 if (n_constraints) { 659 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 660 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 661 ierr = MatDestroy(&M1);CHKERRQ(ierr); 662 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 663 } 664 ierr = PetscFree(auxindices);CHKERRQ(ierr); 665 /* get back data */ 666 *coarse_submat_vals_n = coarse_submat_vals; 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 672 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 673 { 674 PC_IS* pcis = (PC_IS*)(pc->data); 675 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 676 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 677 /* manage repeated solves */ 678 MatReuse reuse; 679 MatStructure matstruct; 680 PetscErrorCode ierr; 681 682 PetscFunctionBegin; 683 /* get mat flags */ 684 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 685 reuse = MAT_INITIAL_MATRIX; 686 if (pc->setupcalled) { 687 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 688 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 689 if (matstruct == SAME_NONZERO_PATTERN) { 690 reuse = MAT_REUSE_MATRIX; 691 } else { 692 reuse = MAT_INITIAL_MATRIX; 693 } 694 } 695 if (reuse == MAT_INITIAL_MATRIX) { 696 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 701 } 702 703 /* transform local matrices if needed */ 704 if (pcbddc->use_change_of_basis) { 705 Mat change_mat_all; 706 PetscScalar *row_cmat_values; 707 PetscInt *row_cmat_indices; 708 PetscInt *nnz,*is_indices,*temp_indices; 709 PetscInt i,j,k,n_D,n_B; 710 711 /* Get Non-overlapping dimensions */ 712 n_B = pcis->n_B; 713 n_D = pcis->n-n_B; 714 715 /* compute nonzero structure of change of basis on all local nodes */ 716 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 717 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 718 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 719 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 720 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 721 k=1; 722 for (i=0;i<n_B;i++) { 723 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 724 nnz[is_indices[i]]=j; 725 if (k < j) k = j; 726 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 727 } 728 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 729 /* assemble change of basis matrix on the whole set of local dofs */ 730 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 731 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 732 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 733 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 734 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 735 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 736 for (i=0;i<n_D;i++) { 737 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 738 } 739 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 743 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 744 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 746 } 747 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 748 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 749 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 750 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 751 if (i==1) { 752 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 753 } else { 754 Mat work_mat; 755 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 756 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 757 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 758 } 759 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 760 ierr = PetscFree(nnz);CHKERRQ(ierr); 761 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 762 } else { 763 /* without change of basis, the local matrix is unchanged */ 764 if (!pcbddc->local_mat) { 765 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 766 pcbddc->local_mat = matis->A; 767 } 768 } 769 770 /* get submatrices */ 771 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 772 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 773 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 774 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 780 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 781 { 782 PC_IS* pcis = (PC_IS*)(pc->data); 783 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 784 IS is_aux1,is_aux2; 785 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 786 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 787 PetscBT bitmask; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* Set Non-overlapping dimensions */ 792 n_B = pcis->n_B; n_D = pcis->n - n_B; 793 /* get vertex indices from constraint matrix */ 794 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 795 /* Set number of constraints */ 796 n_constraints = pcbddc->local_primal_size-n_vertices; 797 /* create auxiliary bitmask */ 798 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 799 for (i=0;i<n_vertices;i++) { 800 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 801 } 802 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 803 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 804 for (i=0, n_R=0; i<pcis->n; i++) { 805 if (!PetscBTLookup(bitmask,i)) { 806 idx_R_local[n_R] = i; 807 n_R++; 808 } 809 } 810 ierr = PetscFree(vertices);CHKERRQ(ierr); 811 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 812 813 /* print some info if requested */ 814 if (pcbddc->dbg_flag) { 815 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 821 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 822 } 823 824 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 825 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 826 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 827 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 828 for (i=0; i<n_D; i++) { 829 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 830 } 831 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 832 for (i=0, j=0; i<n_R; i++) { 833 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 834 aux_array1[j++] = i; 835 } 836 } 837 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 838 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 839 for (i=0, j=0; i<n_B; i++) { 840 if (!PetscBTLookup(bitmask,is_indices[i])) { 841 aux_array2[j++] = i; 842 } 843 } 844 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 845 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 846 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 848 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 849 850 if (pcbddc->switch_static || pcbddc->dbg_flag) { 851 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 852 for (i=0, j=0; i<n_R; i++) { 853 if (PetscBTLookup(bitmask,idx_R_local[i])) { 854 aux_array1[j++] = i; 855 } 856 } 857 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 858 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 859 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 860 } 861 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 867 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 868 { 869 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 870 871 PetscFunctionBegin; 872 pcbddc->use_exact_dirichlet=use; 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 878 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 879 { 880 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 881 PC_IS *pcis = (PC_IS*)pc->data; 882 PC pc_temp; 883 Mat A_RR; 884 Vec vec1,vec2,vec3; 885 MatStructure matstruct; 886 PetscScalar m_one = -1.0; 887 PetscReal value; 888 PetscInt n_D,n_R,use_exact,use_exact_reduced; 889 PetscErrorCode ierr; 890 891 PetscFunctionBegin; 892 /* Creating PC contexts for local Dirichlet and Neumann problems */ 893 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 894 895 /* DIRICHLET PROBLEM */ 896 /* Matrix for Dirichlet problem is pcis->A_II */ 897 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 898 if (!pcbddc->ksp_D) { /* create object if not yet build */ 899 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 900 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 901 /* default */ 902 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 903 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 904 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 905 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 906 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 907 } 908 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 909 /* Allow user's customization */ 910 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 911 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 912 if (!n_D) { 913 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 914 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 915 } 916 /* Set Up KSP for Dirichlet problem of BDDC */ 917 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 918 /* set ksp_D into pcis data */ 919 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 920 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 921 pcis->ksp_D = pcbddc->ksp_D; 922 923 /* NEUMANN PROBLEM */ 924 /* Matrix for Neumann problem is A_RR -> we need to create it */ 925 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 926 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 927 if (!pcbddc->ksp_R) { /* create object if not yet build */ 928 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 929 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 930 /* default */ 931 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 932 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 933 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 934 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 935 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 936 } 937 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 938 /* Allow user's customization */ 939 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 940 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 941 if (!n_R) { 942 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 943 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 944 } 945 /* Set Up KSP for Neumann problem of BDDC */ 946 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 947 948 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 949 950 /* Dirichlet */ 951 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 952 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 953 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 954 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 955 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 956 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 957 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 958 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 959 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 960 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 961 /* need to be adapted? */ 962 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 963 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 964 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 965 /* print info */ 966 if (pcbddc->dbg_flag) { 967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 969 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 972 } 973 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 974 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 975 } 976 977 /* Neumann */ 978 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 979 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 980 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 981 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 982 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 983 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 984 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 985 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 986 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 987 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 988 /* need to be adapted? */ 989 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 990 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 991 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 992 /* print info */ 993 if (pcbddc->dbg_flag) { 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 996 } 997 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 998 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 999 } 1000 1001 /* free Neumann problem's matrix */ 1002 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1008 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1009 { 1010 PetscErrorCode ierr; 1011 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1012 1013 PetscFunctionBegin; 1014 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1015 if (pcbddc->local_auxmat1) { 1016 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1017 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1018 } 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1024 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1025 { 1026 PetscErrorCode ierr; 1027 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1028 PC_IS* pcis = (PC_IS*) (pc->data); 1029 const PetscScalar zero = 0.0; 1030 1031 PetscFunctionBegin; 1032 /* Application of PHI^T (or PSI^T) */ 1033 if (pcbddc->coarse_psi_B) { 1034 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1035 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1036 } else { 1037 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1038 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1039 } 1040 /* Scatter data of coarse_rhs */ 1041 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1042 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 1044 /* Local solution on R nodes */ 1045 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1046 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1048 if (pcbddc->switch_static) { 1049 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1050 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1051 } 1052 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1053 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1054 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 if (pcbddc->switch_static) { 1057 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 } 1060 1061 /* Coarse solution */ 1062 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1064 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1067 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 1069 /* Sum contributions from two levels */ 1070 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1071 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1077 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1078 { 1079 PetscErrorCode ierr; 1080 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1081 1082 PetscFunctionBegin; 1083 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1089 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1090 { 1091 PetscErrorCode ierr; 1092 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1093 1094 PetscFunctionBegin; 1095 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* uncomment for testing purposes */ 1100 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1103 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1104 { 1105 PetscErrorCode ierr; 1106 PC_IS* pcis = (PC_IS*)(pc->data); 1107 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1108 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1109 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1110 MatType impMatType=MATSEQAIJ; 1111 /* one and zero */ 1112 PetscScalar one=1.0,zero=0.0; 1113 /* space to store constraints and their local indices */ 1114 PetscScalar *temp_quadrature_constraint; 1115 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1116 /* iterators */ 1117 PetscInt i,j,k,total_counts,temp_start_ptr; 1118 /* stuff to store connected components stored in pcbddc->mat_graph */ 1119 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1120 PetscInt n_ISForFaces,n_ISForEdges; 1121 /* near null space stuff */ 1122 MatNullSpace nearnullsp; 1123 const Vec *nearnullvecs; 1124 Vec *localnearnullsp; 1125 PetscBool nnsp_has_cnst; 1126 PetscInt nnsp_size; 1127 PetscScalar *array; 1128 /* BLAS integers */ 1129 PetscBLASInt lwork,lierr; 1130 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1131 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1132 /* LAPACK working arrays for SVD or POD */ 1133 PetscBool skip_lapack; 1134 PetscScalar *work; 1135 PetscReal *singular_vals; 1136 #if defined(PETSC_USE_COMPLEX) 1137 PetscReal *rwork; 1138 #endif 1139 #if defined(PETSC_MISSING_LAPACK_GESVD) 1140 PetscBLASInt Blas_one_2=1; 1141 PetscScalar *temp_basis,*correlation_mat; 1142 #else 1143 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1144 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1145 #endif 1146 /* change of basis */ 1147 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1148 PetscBool boolforchange; 1149 PetscBT touched,change_basis; 1150 /* auxiliary stuff */ 1151 PetscInt *nnz,*is_indices,*local_to_B; 1152 /* some quantities */ 1153 PetscInt n_vertices,total_primal_vertices; 1154 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1155 1156 1157 PetscFunctionBegin; 1158 /* Get index sets for faces, edges and vertices from graph */ 1159 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1160 pcbddc->use_vertices = PETSC_TRUE; 1161 } 1162 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1163 /* print some info */ 1164 if (pcbddc->dbg_flag) { 1165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1166 i = 0; 1167 if (ISForVertices) { 1168 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1169 } 1170 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1172 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1173 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1174 } 1175 /* check if near null space is attached to global mat */ 1176 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1177 if (nearnullsp) { 1178 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1179 } else { /* if near null space is not provided BDDC uses constants by default */ 1180 nnsp_size = 0; 1181 nnsp_has_cnst = PETSC_TRUE; 1182 } 1183 /* get max number of constraints on a single cc */ 1184 max_constraints = nnsp_size; 1185 if (nnsp_has_cnst) max_constraints++; 1186 1187 /* 1188 Evaluate maximum storage size needed by the procedure 1189 - temp_indices will contain start index of each constraint stored as follows 1190 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1191 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1192 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1193 */ 1194 total_counts = n_ISForFaces+n_ISForEdges; 1195 total_counts *= max_constraints; 1196 n_vertices = 0; 1197 if (ISForVertices) { 1198 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1199 } 1200 total_counts += n_vertices; 1201 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1202 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1203 total_counts = 0; 1204 max_size_of_constraint = 0; 1205 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1206 if (i<n_ISForEdges) { 1207 used_IS = &ISForEdges[i]; 1208 } else { 1209 used_IS = &ISForFaces[i-n_ISForEdges]; 1210 } 1211 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1212 total_counts += j; 1213 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1214 } 1215 total_counts *= max_constraints; 1216 total_counts += n_vertices; 1217 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1218 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1219 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1220 /* local to boundary numbering */ 1221 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1222 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1223 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1224 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1225 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1226 /* get local part of global near null space vectors */ 1227 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1228 for (k=0;k<nnsp_size;k++) { 1229 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1230 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 } 1233 1234 /* whether or not to skip lapack calls */ 1235 skip_lapack = PETSC_TRUE; 1236 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1237 1238 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1239 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1240 PetscScalar temp_work; 1241 #if defined(PETSC_MISSING_LAPACK_GESVD) 1242 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1243 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1244 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1245 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1246 #if defined(PETSC_USE_COMPLEX) 1247 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1248 #endif 1249 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1250 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1251 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1252 lwork = -1; 1253 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1254 #if !defined(PETSC_USE_COMPLEX) 1255 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1256 #else 1257 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1258 #endif 1259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1260 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1261 #else /* on missing GESVD */ 1262 /* SVD */ 1263 PetscInt max_n,min_n; 1264 max_n = max_size_of_constraint; 1265 min_n = max_constraints; 1266 if (max_size_of_constraint < max_constraints) { 1267 min_n = max_size_of_constraint; 1268 max_n = max_constraints; 1269 } 1270 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1271 #if defined(PETSC_USE_COMPLEX) 1272 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1273 #endif 1274 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1275 lwork = -1; 1276 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1277 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1278 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1280 #if !defined(PETSC_USE_COMPLEX) 1281 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1282 #else 1283 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1284 #endif 1285 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1286 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1287 #endif /* on missing GESVD */ 1288 /* Allocate optimal workspace */ 1289 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1290 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1291 } 1292 /* Now we can loop on constraining sets */ 1293 total_counts = 0; 1294 temp_indices[0] = 0; 1295 /* vertices */ 1296 if (ISForVertices) { 1297 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 if (nnsp_has_cnst) { /* consider all vertices */ 1299 for (i=0;i<n_vertices;i++) { 1300 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1301 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1302 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1303 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1304 total_counts++; 1305 } 1306 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1307 PetscBool used_vertex; 1308 for (i=0;i<n_vertices;i++) { 1309 used_vertex = PETSC_FALSE; 1310 k = 0; 1311 while (!used_vertex && k<nnsp_size) { 1312 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1313 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1314 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1315 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1316 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1317 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1318 total_counts++; 1319 used_vertex = PETSC_TRUE; 1320 } 1321 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1322 k++; 1323 } 1324 } 1325 } 1326 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1327 n_vertices = total_counts; 1328 } 1329 1330 /* edges and faces */ 1331 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1332 if (i<n_ISForEdges) { 1333 used_IS = &ISForEdges[i]; 1334 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1335 } else { 1336 used_IS = &ISForFaces[i-n_ISForEdges]; 1337 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1338 } 1339 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1340 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1341 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1342 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1343 /* change of basis should not be performed on local periodic nodes */ 1344 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1345 if (nnsp_has_cnst) { 1346 PetscScalar quad_value; 1347 temp_constraints++; 1348 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1349 for (j=0;j<size_of_constraint;j++) { 1350 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1351 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1352 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1353 } 1354 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1355 if (boolforchange) { 1356 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1357 } 1358 total_counts++; 1359 } 1360 for (k=0;k<nnsp_size;k++) { 1361 PetscReal real_value; 1362 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1363 for (j=0;j<size_of_constraint;j++) { 1364 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1365 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1366 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1367 } 1368 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1369 /* check if array is null on the connected component */ 1370 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1371 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1372 if (real_value > 0.0) { /* keep indices and values */ 1373 temp_constraints++; 1374 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1375 if (boolforchange) { 1376 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1377 } 1378 total_counts++; 1379 } 1380 } 1381 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1382 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1383 if (!pcbddc->use_nnsp_true) { 1384 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1385 1386 #if defined(PETSC_MISSING_LAPACK_GESVD) 1387 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1388 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1389 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1390 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1391 from that computed using LAPACKgesvd 1392 -> This is due to a different computation of eigenvectors in LAPACKheev 1393 -> The quality of the POD-computed basis will be the same */ 1394 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1395 /* Store upper triangular part of correlation matrix */ 1396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1397 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1398 for (j=0;j<temp_constraints;j++) { 1399 for (k=0;k<j+1;k++) { 1400 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1401 } 1402 } 1403 /* compute eigenvalues and eigenvectors of correlation matrix */ 1404 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1405 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1406 #if !defined(PETSC_USE_COMPLEX) 1407 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1408 #else 1409 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1410 #endif 1411 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1412 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1413 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1414 j=0; 1415 while (j < temp_constraints && singular_vals[j] < tol) j++; 1416 total_counts=total_counts-j; 1417 /* scale and copy POD basis into used quadrature memory */ 1418 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1419 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1420 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1421 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1422 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1423 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1424 if (j<temp_constraints) { 1425 PetscInt ii; 1426 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1427 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1428 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1429 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1430 for (k=0;k<temp_constraints-j;k++) { 1431 for (ii=0;ii<size_of_constraint;ii++) { 1432 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1433 } 1434 } 1435 } 1436 #else /* on missing GESVD */ 1437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1438 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1439 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1440 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1441 #if !defined(PETSC_USE_COMPLEX) 1442 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1443 #else 1444 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1445 #endif 1446 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1447 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1448 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1449 k = temp_constraints; 1450 if (k > size_of_constraint) k = size_of_constraint; 1451 j = 0; 1452 while (j < k && singular_vals[k-j-1] < tol) j++; 1453 total_counts = total_counts-temp_constraints+k-j; 1454 #endif /* on missing GESVD */ 1455 } 1456 } 1457 /* free index sets of faces, edges and vertices */ 1458 for (i=0;i<n_ISForFaces;i++) { 1459 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1460 } 1461 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1462 for (i=0;i<n_ISForEdges;i++) { 1463 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1464 } 1465 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1466 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1467 1468 /* free workspace */ 1469 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1470 ierr = PetscFree(work);CHKERRQ(ierr); 1471 #if defined(PETSC_USE_COMPLEX) 1472 ierr = PetscFree(rwork);CHKERRQ(ierr); 1473 #endif 1474 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1475 #if defined(PETSC_MISSING_LAPACK_GESVD) 1476 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1477 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1478 #endif 1479 } 1480 for (k=0;k<nnsp_size;k++) { 1481 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1482 } 1483 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1484 1485 /* set quantities in pcbddc data structure */ 1486 /* n_vertices defines the number of subdomain corners in the primal space */ 1487 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1488 pcbddc->local_primal_size = total_counts; 1489 pcbddc->n_vertices = n_vertices; 1490 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1491 1492 /* Create constraint matrix */ 1493 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1494 /* so we need to set it up properly either with or without change of basis */ 1495 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1496 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1497 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1498 /* array to compute a local numbering of constraints : vertices first then constraints */ 1499 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1500 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1501 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1502 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1503 /* auxiliary stuff for basis change */ 1504 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1505 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1506 1507 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1508 total_primal_vertices=0; 1509 for (i=0;i<pcbddc->local_primal_size;i++) { 1510 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1511 if (size_of_constraint == 1) { 1512 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1513 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1514 aux_primal_minloc[total_primal_vertices]=0; 1515 total_primal_vertices++; 1516 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1517 PetscInt min_loc,min_index; 1518 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1519 /* find first untouched local node */ 1520 k = 0; 1521 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1522 min_index = global_indices[k]; 1523 min_loc = k; 1524 /* search the minimum among global nodes already untouched on the cc */ 1525 for (k=1;k<size_of_constraint;k++) { 1526 /* there can be more than one constraint on a single connected component */ 1527 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1528 min_index = global_indices[k]; 1529 min_loc = k; 1530 } 1531 } 1532 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1533 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1534 aux_primal_minloc[total_primal_vertices]=min_loc; 1535 total_primal_vertices++; 1536 } 1537 } 1538 /* free workspace */ 1539 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1540 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1541 /* permute indices in order to have a sorted set of vertices */ 1542 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1543 1544 /* nonzero structure of constraint matrix */ 1545 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1546 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1547 j=total_primal_vertices; 1548 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1549 if (!PetscBTLookup(change_basis,i)) { 1550 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1551 j++; 1552 } 1553 } 1554 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1555 ierr = PetscFree(nnz);CHKERRQ(ierr); 1556 /* set values in constraint matrix */ 1557 for (i=0;i<total_primal_vertices;i++) { 1558 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1559 } 1560 total_counts = total_primal_vertices; 1561 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1562 if (!PetscBTLookup(change_basis,i)) { 1563 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1564 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1565 total_counts++; 1566 } 1567 } 1568 /* assembling */ 1569 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1570 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1571 /* 1572 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1573 */ 1574 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1575 if (pcbddc->use_change_of_basis) { 1576 PetscBool qr_needed = PETSC_FALSE; 1577 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1578 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1579 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1580 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1581 /* work arrays */ 1582 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1583 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1584 /* nonzeros per row */ 1585 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1586 if (PetscBTLookup(change_basis,i)) { 1587 qr_needed = PETSC_TRUE; 1588 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1589 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1590 } 1591 } 1592 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1593 ierr = PetscFree(nnz);CHKERRQ(ierr); 1594 /* Set initial identity in the matrix */ 1595 for (i=0;i<pcis->n_B;i++) { 1596 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1597 } 1598 1599 /* Now we loop on the constraints which need a change of basis */ 1600 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1601 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1602 /* Change of basis matrix T computed via QR decomposition of constraints */ 1603 if (qr_needed) { 1604 /* dual and primal dofs on a single cc */ 1605 PetscInt dual_dofs,primal_dofs; 1606 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1607 PetscInt primal_counter; 1608 /* working stuff for GEQRF */ 1609 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1610 PetscBLASInt lqr_work; 1611 /* working stuff for UNGQR */ 1612 PetscScalar *gqr_work,lgqr_work_t; 1613 PetscBLASInt lgqr_work; 1614 /* working stuff for TRTRS */ 1615 PetscScalar *trs_rhs; 1616 PetscBLASInt Blas_NRHS; 1617 /* pointers for values insertion into change of basis matrix */ 1618 PetscInt *start_rows,*start_cols; 1619 PetscScalar *start_vals; 1620 /* working stuff for values insertion */ 1621 PetscBT is_primal; 1622 1623 /* space to store Q */ 1624 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1625 /* first we issue queries for optimal work */ 1626 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1627 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1628 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1629 lqr_work = -1; 1630 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1631 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1632 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1633 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1634 lgqr_work = -1; 1635 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1637 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1638 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1639 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1640 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1641 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1642 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1643 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1644 /* array to store scaling factors for reflectors */ 1645 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1646 /* array to store rhs and solution of triangular solver */ 1647 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1648 /* array to store whether a node is primal or not */ 1649 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1650 for (i=0;i<total_primal_vertices;i++) { 1651 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1652 } 1653 1654 /* allocating workspace for check */ 1655 if (pcbddc->dbg_flag) { 1656 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1657 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1658 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1659 } 1660 1661 /* loop on constraints and see whether or not they need a change of basis */ 1662 /* -> using implicit ordering contained in temp_indices data */ 1663 total_counts = pcbddc->n_vertices; 1664 primal_counter = total_counts; 1665 while (total_counts<pcbddc->local_primal_size) { 1666 primal_dofs = 1; 1667 if (PetscBTLookup(change_basis,total_counts)) { 1668 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1669 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1670 primal_dofs++; 1671 } 1672 /* get constraint info */ 1673 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1674 dual_dofs = size_of_constraint-primal_dofs; 1675 1676 /* copy quadrature constraints for change of basis check */ 1677 if (pcbddc->dbg_flag) { 1678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1679 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1680 } 1681 1682 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1683 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1684 1685 /* compute QR decomposition of constraints */ 1686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1689 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1690 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1691 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1692 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1693 1694 /* explictly compute R^-T */ 1695 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1696 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1697 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1698 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1700 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1701 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1702 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1703 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1704 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1705 1706 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1709 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1712 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1713 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1715 1716 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1717 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1718 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1719 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1720 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1721 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1726 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1727 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1728 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1729 1730 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1731 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1732 /* insert cols for primal dofs */ 1733 for (j=0;j<primal_dofs;j++) { 1734 start_vals = &qr_basis[j*size_of_constraint]; 1735 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1736 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1737 } 1738 /* insert cols for dual dofs */ 1739 for (j=0,k=0;j<dual_dofs;k++) { 1740 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1741 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1742 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1743 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1744 j++; 1745 } 1746 } 1747 1748 /* check change of basis */ 1749 if (pcbddc->dbg_flag) { 1750 PetscInt ii,jj; 1751 PetscBool valid_qr=PETSC_TRUE; 1752 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1753 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1757 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1758 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1759 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1760 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1761 for (jj=0;jj<size_of_constraint;jj++) { 1762 for (ii=0;ii<primal_dofs;ii++) { 1763 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1764 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1765 } 1766 } 1767 if (!valid_qr) { 1768 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1769 for (jj=0;jj<size_of_constraint;jj++) { 1770 for (ii=0;ii<primal_dofs;ii++) { 1771 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1772 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1773 } 1774 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1775 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1776 } 1777 } 1778 } 1779 } else { 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1781 } 1782 } 1783 /* increment primal counter */ 1784 primal_counter += primal_dofs; 1785 } else { 1786 if (pcbddc->dbg_flag) { 1787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1788 } 1789 } 1790 /* increment constraint counter total_counts */ 1791 total_counts += primal_dofs; 1792 } 1793 if (pcbddc->dbg_flag) { 1794 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1795 ierr = PetscFree(work);CHKERRQ(ierr); 1796 } 1797 /* free workspace */ 1798 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1799 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1800 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1801 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1802 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1803 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1804 } 1805 /* assembling */ 1806 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1807 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1808 /* 1809 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1810 */ 1811 } 1812 /* free workspace */ 1813 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1814 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1815 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1816 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1817 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1818 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1819 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1820 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1826 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1827 { 1828 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1829 PC_IS *pcis = (PC_IS*)pc->data; 1830 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1831 PetscInt bs,ierr,i,vertex_size; 1832 PetscViewer viewer=pcbddc->dbg_viewer; 1833 1834 PetscFunctionBegin; 1835 /* Init local Graph struct */ 1836 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1837 1838 /* Check validity of the csr graph passed in by the user */ 1839 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1840 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1841 } 1842 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1843 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1844 Mat mat_adj; 1845 const PetscInt *xadj,*adjncy; 1846 PetscBool flg_row=PETSC_TRUE; 1847 1848 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1849 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1850 if (!flg_row) { 1851 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1852 } 1853 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1854 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1855 if (!flg_row) { 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1857 } 1858 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1859 } 1860 1861 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1862 vertex_size = 1; 1863 if (!pcbddc->n_ISForDofs) { 1864 IS *custom_ISForDofs; 1865 1866 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1867 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1868 for (i=0;i<bs;i++) { 1869 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1870 } 1871 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1872 /* remove my references to IS objects */ 1873 for (i=0;i<bs;i++) { 1874 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1875 } 1876 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1877 } else { /* mat block size as vertex size (used for elasticity) */ 1878 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1879 } 1880 1881 /* Setup of Graph */ 1882 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1883 1884 /* Graph's connected components analysis */ 1885 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1886 1887 /* print some info to stdout */ 1888 if (pcbddc->dbg_flag) { 1889 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1896 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1897 { 1898 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1899 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1900 PetscErrorCode ierr; 1901 1902 PetscFunctionBegin; 1903 n = 0; 1904 vertices = 0; 1905 if (pcbddc->ConstraintMatrix) { 1906 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1907 for (i=0;i<local_primal_size;i++) { 1908 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1909 if (size_of_constraint == 1) n++; 1910 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1911 } 1912 if (vertices_idx) { 1913 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1914 n = 0; 1915 for (i=0;i<local_primal_size;i++) { 1916 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1917 if (size_of_constraint == 1) { 1918 vertices[n++]=row_cmat_indices[0]; 1919 } 1920 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1921 } 1922 } 1923 } 1924 *n_vertices = n; 1925 if (vertices_idx) *vertices_idx = vertices; 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1931 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1932 { 1933 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1934 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1935 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1936 PetscBT touched; 1937 PetscErrorCode ierr; 1938 1939 /* This function assumes that the number of local constraints per connected component 1940 is not greater than the number of nodes defined for the connected component 1941 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1942 PetscFunctionBegin; 1943 n = 0; 1944 constraints_index = 0; 1945 if (pcbddc->ConstraintMatrix) { 1946 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1947 max_size_of_constraint = 0; 1948 for (i=0;i<local_primal_size;i++) { 1949 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1950 if (size_of_constraint > 1) { 1951 n++; 1952 } 1953 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1954 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1955 } 1956 if (constraints_idx) { 1957 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1958 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1959 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1960 n = 0; 1961 for (i=0;i<local_primal_size;i++) { 1962 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1963 if (size_of_constraint > 1) { 1964 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1965 /* find first untouched local node */ 1966 j = 0; 1967 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1968 min_index = row_cmat_global_indices[j]; 1969 min_loc = j; 1970 /* search the minimum among nodes not yet touched on the connected component 1971 since there can be more than one constraint on a single cc */ 1972 for (j=1;j<size_of_constraint;j++) { 1973 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1974 min_index = row_cmat_global_indices[j]; 1975 min_loc = j; 1976 } 1977 } 1978 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1979 constraints_index[n++] = row_cmat_indices[min_loc]; 1980 } 1981 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1982 } 1983 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1984 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1985 } 1986 } 1987 *n_constraints = n; 1988 if (constraints_idx) *constraints_idx = constraints_index; 1989 PetscFunctionReturn(0); 1990 } 1991 1992 /* the next two functions has been adapted from pcis.c */ 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "PCBDDCApplySchur" 1995 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 1996 { 1997 PetscErrorCode ierr; 1998 PC_IS *pcis = (PC_IS*)(pc->data); 1999 2000 PetscFunctionBegin; 2001 if (!vec2_B) { vec2_B = v; } 2002 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2003 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2004 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2005 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2006 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2012 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2013 { 2014 PetscErrorCode ierr; 2015 PC_IS *pcis = (PC_IS*)(pc->data); 2016 2017 PetscFunctionBegin; 2018 if (!vec2_B) { vec2_B = v; } 2019 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2020 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2021 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2022 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2023 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "PCBDDCSubsetNumbering" 2029 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2030 { 2031 Vec local_vec,global_vec; 2032 IS seqis,paris; 2033 VecScatter scatter_ctx; 2034 PetscScalar *array; 2035 PetscInt *temp_global_dofs; 2036 PetscScalar globalsum; 2037 PetscInt i,j,s; 2038 PetscInt nlocals,first_index,old_index,max_local; 2039 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2040 PetscMPIInt *dof_sizes,*dof_displs; 2041 PetscBool first_found; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 /* mpi buffers */ 2046 MPI_Comm_size(comm,&size_prec_comm); 2047 MPI_Comm_rank(comm,&rank_prec_comm); 2048 j = ( !rank_prec_comm ? size_prec_comm : 0); 2049 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2050 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2051 /* get maximum size of subset */ 2052 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2053 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2054 max_local = 0; 2055 if (n_local_dofs) { 2056 max_local = temp_global_dofs[0]; 2057 for (i=1;i<n_local_dofs;i++) { 2058 if (max_local < temp_global_dofs[i] ) { 2059 max_local = temp_global_dofs[i]; 2060 } 2061 } 2062 } 2063 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2064 max_global++; 2065 max_local = 0; 2066 if (n_local_dofs) { 2067 max_local = local_dofs[0]; 2068 for (i=1;i<n_local_dofs;i++) { 2069 if (max_local < local_dofs[i] ) { 2070 max_local = local_dofs[i]; 2071 } 2072 } 2073 } 2074 max_local++; 2075 /* allocate workspace */ 2076 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2077 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2078 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2079 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2080 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2081 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2082 /* create scatter */ 2083 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2084 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2085 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2086 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2087 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2088 /* init array */ 2089 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2090 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2091 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2092 if (local_dofs_mult) { 2093 for (i=0;i<n_local_dofs;i++) { 2094 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2095 } 2096 } else { 2097 for (i=0;i<n_local_dofs;i++) { 2098 array[local_dofs[i]]=1.0; 2099 } 2100 } 2101 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2102 /* scatter into global vec and get total number of global dofs */ 2103 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2106 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2107 /* Fill global_vec with cumulative function for global numbering */ 2108 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2109 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2110 nlocals = 0; 2111 first_index = -1; 2112 first_found = PETSC_FALSE; 2113 for (i=0;i<s;i++) { 2114 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2115 first_found = PETSC_TRUE; 2116 first_index = i; 2117 } 2118 nlocals += (PetscInt)PetscRealPart(array[i]); 2119 } 2120 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2121 if (!rank_prec_comm) { 2122 dof_displs[0]=0; 2123 for (i=1;i<size_prec_comm;i++) { 2124 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2125 } 2126 } 2127 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2128 if (first_found) { 2129 array[first_index] += (PetscScalar)nlocals; 2130 old_index = first_index; 2131 for (i=first_index+1;i<s;i++) { 2132 if (PetscRealPart(array[i]) > 0.0) { 2133 array[i] += array[old_index]; 2134 old_index = i; 2135 } 2136 } 2137 } 2138 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2139 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2140 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2141 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2142 /* get global ordering of local dofs */ 2143 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2144 if (local_dofs_mult) { 2145 for (i=0;i<n_local_dofs;i++) { 2146 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2147 } 2148 } else { 2149 for (i=0;i<n_local_dofs;i++) { 2150 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2151 } 2152 } 2153 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2154 /* free workspace */ 2155 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2156 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2157 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2158 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2159 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2160 /* return pointer to global ordering of local dofs */ 2161 *global_numbering_subset = temp_global_dofs; 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2167 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2168 { 2169 PetscInt i,j; 2170 PetscScalar *alphas; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 /* this implements stabilized Gram-Schmidt */ 2175 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2176 for (i=0;i<n;i++) { 2177 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2178 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2179 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2180 } 2181 ierr = PetscFree(alphas);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatConvert_IS_AIJ" 2188 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2189 { 2190 Mat new_mat; 2191 Mat_IS *matis = (Mat_IS*)(mat->data); 2192 /* info on mat */ 2193 PetscInt bs,rows,cols; 2194 PetscInt lrows,lcols; 2195 PetscInt local_rows,local_cols; 2196 PetscMPIInt nsubdomains; 2197 /* preallocation */ 2198 Vec vec_dnz,vec_onz; 2199 PetscScalar *my_dnz,*my_onz,*array; 2200 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2201 PetscInt index_row,index_col,owner; 2202 PetscInt *local_indices,*global_indices; 2203 /* work */ 2204 PetscInt i,j; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 /* CHECKS */ 2209 /* get info from mat */ 2210 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2211 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2212 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2213 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2214 2215 /* MAT_INITIAL_MATRIX */ 2216 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2217 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2218 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2219 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2220 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2222 2223 /* 2224 preallocation 2225 */ 2226 2227 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2228 /* 2229 Some vectors are needed to sum up properly on shared interface dofs. 2230 Note that preallocation is not exact, since it overestimates nonzeros 2231 */ 2232 /* 2233 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2234 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2235 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2236 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2237 */ 2238 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2239 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2240 /* All processes need to compute entire row ownership */ 2241 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2242 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2243 for (i=0;i<nsubdomains;i++) { 2244 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2245 row_ownership[j]=i; 2246 } 2247 } 2248 /* map indices of local mat to global values */ 2249 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2250 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2251 for (i=0;i<local_rows;i++) local_indices[i]=i; 2252 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2253 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2254 2255 /* 2256 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2257 then, they will be summed up properly. This way, preallocation is always sufficient 2258 */ 2259 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2260 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2261 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2262 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2263 for (i=0;i<local_rows;i++) { 2264 index_row = global_indices[i]; 2265 for (j=i;j<local_rows;j++) { 2266 owner = row_ownership[index_row]; 2267 index_col = global_indices[j]; 2268 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2269 my_dnz[i] += 1.0; 2270 } else { /* offdiag block */ 2271 my_onz[i] += 1.0; 2272 } 2273 /* same as before, interchanging rows and cols */ 2274 if (i != j) { 2275 owner = row_ownership[index_col]; 2276 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2277 my_dnz[j] += 1.0; 2278 } else { 2279 my_onz[j] += 1.0; 2280 } 2281 } 2282 } 2283 } 2284 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2285 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2286 if (local_rows) { /* multilevel guard */ 2287 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2288 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2289 } 2290 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2291 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2292 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2293 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2294 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2295 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2296 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2297 2298 /* set computed preallocation in dnz and onz */ 2299 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2300 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2301 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2302 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2303 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2304 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2305 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2306 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2307 2308 /* Resize preallocation if overestimated */ 2309 for (i=0;i<lrows;i++) { 2310 dnz[i] = PetscMin(dnz[i],lcols); 2311 onz[i] = PetscMin(onz[i],cols-lcols); 2312 } 2313 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2314 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2315 2316 /* 2317 Set values. Very Basic. 2318 */ 2319 for (i=0;i<local_rows;i++) { 2320 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2321 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2322 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2323 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2324 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2325 } 2326 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2327 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2328 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2329 2330 /* get back mat */ 2331 *M = new_mat; 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatISSubassemble_Private" 2337 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2338 { 2339 Mat subdomain_adj; 2340 IS new_ranks,ranks_send_to; 2341 MatPartitioning partitioner; 2342 Mat_IS *matis; 2343 PetscInt n_neighs,*neighs,*n_shared,**shared; 2344 PetscInt prank; 2345 PetscMPIInt size,rank,color; 2346 PetscInt *xadj,*adjncy,*oldranks; 2347 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2348 PetscInt i,j,n_subdomains,local_size,threshold=0; 2349 PetscErrorCode ierr; 2350 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE,use_threshold=PETSC_FALSE; 2351 PetscSubcomm subcomm; 2352 2353 PetscFunctionBegin; 2354 ierr = PetscOptionsGetBool(NULL,"-matisc_use_square",&use_square,NULL);CHKERRQ(ierr); 2355 ierr = PetscOptionsGetBool(NULL,"-matisc_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2356 ierr = PetscOptionsGetInt(NULL,"-matisc_threshold",&threshold,&use_threshold);CHKERRQ(ierr); 2357 2358 /* Get info on mapping */ 2359 matis = (Mat_IS*)(mat->data); 2360 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2361 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2362 2363 /* build local CSR graph of subdomains' connectivity */ 2364 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2365 xadj[0] = 0; 2366 xadj[1] = PetscMax(n_neighs-1,0); 2367 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2368 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2369 2370 if (use_threshold) { 2371 PetscInt* count; 2372 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2373 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2374 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2375 for (j=0;j<n_shared[i];j++) { 2376 count[shared[i][j]] += 1; 2377 } 2378 } 2379 xadj[1] = 0; 2380 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2381 for (j=0;j<n_shared[i];j++) { 2382 if (count[shared[i][j]] < threshold) { 2383 adjncy[xadj[1]] = neighs[i]; 2384 adjncy_wgt[xadj[1]] = n_shared[i]; 2385 xadj[1]++; 2386 break; 2387 } 2388 } 2389 } 2390 ierr = PetscFree(count);CHKERRQ(ierr); 2391 } else { 2392 if (xadj[1]) { 2393 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2394 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2395 } 2396 } 2397 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2398 if (use_square) { 2399 for (i=0;i<xadj[1];i++) { 2400 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2401 } 2402 } 2403 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2404 2405 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2406 2407 /* 2408 Restrict work on active processes only. 2409 */ 2410 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2411 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2412 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2413 ierr = PetscMPIIntCast(!n_neighs,&color);CHKERRQ(ierr); 2414 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr); 2415 if (color) { 2416 ierr = PetscFree(xadj);CHKERRQ(ierr); 2417 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2418 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2419 } else { 2420 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2421 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2422 prank = rank; 2423 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2424 for (i=0;i<size;i++) { 2425 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2426 } 2427 for (i=0;i<xadj[1];i++) { 2428 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2429 } 2430 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2431 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2432 n_subdomains = (PetscInt)size; 2433 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2434 2435 /* Partition */ 2436 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2437 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2438 if (use_vwgt) { 2439 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2440 v_wgt[0] = local_size; 2441 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2442 } 2443 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2444 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2445 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2446 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2447 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2448 ierr = ISView(new_ranks,0);CHKERRQ(ierr); 2449 2450 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2451 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2452 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2453 /* clean up */ 2454 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2455 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2456 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2457 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2458 } 2459 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2460 2461 /* assemble parallel IS for sends */ 2462 i = 1; 2463 if (color) i=0; 2464 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2465 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2466 2467 /* get back IS */ 2468 *is_sends = ranks_send_to; 2469 PetscFunctionReturn(0); 2470 } 2471 2472 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatISSubassemble" 2476 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2477 { 2478 Mat local_mat,new_mat; 2479 Mat_IS *matis; 2480 IS is_sends_internal; 2481 PetscInt rows,cols; 2482 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2483 PetscBool ismatis,isdense; 2484 ISLocalToGlobalMapping l2gmap; 2485 PetscInt* l2gmap_indices; 2486 const PetscInt* is_indices; 2487 MatType new_local_type; 2488 MatTypePrivate new_local_type_private; 2489 /* buffers */ 2490 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2491 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2492 /* MPI */ 2493 MPI_Comm comm; 2494 PetscMPIInt n_sends,n_recvs,commsize; 2495 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2496 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2497 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2498 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 /* checks */ 2503 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2504 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2505 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2506 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2507 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2508 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2509 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2510 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2511 PetscValidLogicalCollectiveInt(mat,bs,0); 2512 /* prepare IS for sending if not provided */ 2513 if (!is_sends) { 2514 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2515 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2516 } else { 2517 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2518 is_sends_internal = is_sends; 2519 } 2520 2521 /* get pointer of MATIS data */ 2522 matis = (Mat_IS*)mat->data; 2523 2524 /* get comm */ 2525 comm = PetscObjectComm((PetscObject)mat); 2526 2527 /* compute number of sends */ 2528 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2529 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2530 2531 /* compute number of receives */ 2532 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2533 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2534 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2535 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2536 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2537 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2538 ierr = PetscFree(iflags);CHKERRQ(ierr); 2539 2540 /* prepare send/receive buffers */ 2541 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2542 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2543 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2544 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2545 2546 /* Get data from local mat */ 2547 if (!isdense) { 2548 /* TODO: See below some guidelines on how to prepare the local buffers */ 2549 /* 2550 send_buffer_vals should contain the raw values of the local matrix 2551 send_buffer_idxs should contain: 2552 - MatType_PRIVATE type 2553 - PetscInt size_of_l2gmap 2554 - PetscInt global_row_indices[size_of_l2gmap] 2555 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2556 */ 2557 } else { 2558 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2559 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2560 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2561 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2562 send_buffer_idxs[1] = i; 2563 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2564 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2565 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2566 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2567 for (i=0;i<n_sends;i++) { 2568 ilengths_vals[is_indices[i]] = len*len; 2569 ilengths_idxs[is_indices[i]] = len+2; 2570 } 2571 } 2572 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2573 buf_size_idxs = 0; 2574 buf_size_vals = 0; 2575 for (i=0;i<n_recvs;i++) { 2576 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2577 buf_size_vals += (PetscInt)olengths_vals[i]; 2578 } 2579 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2580 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2581 2582 /* get new tags for clean communications */ 2583 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2584 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2585 2586 /* allocate for requests */ 2587 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2588 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2589 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2590 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2591 2592 /* communications */ 2593 ptr_idxs = recv_buffer_idxs; 2594 ptr_vals = recv_buffer_vals; 2595 for (i=0;i<n_recvs;i++) { 2596 source_dest = onodes[i]; 2597 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2598 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2599 ptr_idxs += olengths_idxs[i]; 2600 ptr_vals += olengths_vals[i]; 2601 } 2602 for (i=0;i<n_sends;i++) { 2603 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2604 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2605 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2606 } 2607 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2608 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2609 2610 /* assemble new l2g map */ 2611 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2612 ptr_idxs = recv_buffer_idxs; 2613 buf_size_idxs = 0; 2614 for (i=0;i<n_recvs;i++) { 2615 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2616 ptr_idxs += olengths_idxs[i]; 2617 } 2618 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2619 ptr_idxs = recv_buffer_idxs; 2620 buf_size_idxs = 0; 2621 for (i=0;i<n_recvs;i++) { 2622 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2623 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2624 ptr_idxs += olengths_idxs[i]; 2625 } 2626 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2627 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2628 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2629 2630 /* infer new local matrix type from received local matrices type */ 2631 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2632 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2633 new_local_type_private = MATAIJ_PRIVATE; 2634 new_local_type = MATSEQAIJ; 2635 if (n_recvs) { 2636 new_local_type_private = send_buffer_idxs[0]; 2637 ptr_idxs = recv_buffer_idxs; 2638 for (i=0;i<n_recvs;i++) { 2639 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2640 new_local_type_private = MATAIJ_PRIVATE; 2641 break; 2642 } 2643 ptr_idxs += olengths_idxs[i]; 2644 } 2645 switch (new_local_type_private) { 2646 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2647 new_local_type = MATSEQAIJ; 2648 bs = 1; 2649 break; 2650 case MATAIJ_PRIVATE: 2651 new_local_type = MATSEQAIJ; 2652 bs = 1; 2653 break; 2654 case MATBAIJ_PRIVATE: 2655 new_local_type = MATSEQBAIJ; 2656 break; 2657 case MATSBAIJ_PRIVATE: 2658 new_local_type = MATSEQSBAIJ; 2659 break; 2660 default: 2661 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2662 break; 2663 } 2664 } 2665 2666 /* create MATIS object */ 2667 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2668 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2669 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2670 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2671 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2672 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2673 2674 /* set values */ 2675 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2676 ptr_vals = recv_buffer_vals; 2677 ptr_idxs = recv_buffer_idxs; 2678 for (i=0;i<n_recvs;i++) { 2679 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2680 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2681 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2682 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2684 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2685 } 2686 ptr_idxs += olengths_idxs[i]; 2687 ptr_vals += olengths_vals[i]; 2688 } 2689 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2690 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2692 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2693 2694 { /* check */ 2695 Vec lvec,rvec; 2696 PetscReal infty_error; 2697 2698 //ierr = MatView(mat,0);CHKERRQ(ierr); 2699 //ierr = MatView(new_mat,0);CHKERRQ(ierr); 2700 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2701 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2702 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2703 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2704 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2705 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2706 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2707 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2708 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2709 } 2710 2711 /* free workspace */ 2712 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2713 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2714 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2715 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2716 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2717 if (isdense) { 2718 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2719 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2720 } else { 2721 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2722 } 2723 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2724 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2725 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2726 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2727 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2728 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2729 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2730 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2731 ierr = PetscFree(onodes);CHKERRQ(ierr); 2732 /* get back new mat */ 2733 *mat_n = new_mat; 2734 PetscFunctionReturn(0); 2735 } 2736 2737 /* BDDC requires metis 5.0.1 for multilevel */ 2738 #if defined(PETSC_HAVE_METIS) 2739 #include "metis.h" 2740 #define MetisInt idx_t 2741 #define MetisScalar real_t 2742 #endif 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2746 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2747 { 2748 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2749 PC_IS *pcis = (PC_IS*)pc->data; 2750 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2751 ISLocalToGlobalMapping coarse_islg; 2752 IS coarse_is; 2753 2754 PetscInt coarse_size; 2755 2756 MatNullSpace CoarseNullSpace; 2757 2758 PetscErrorCode ierr; 2759 PCType coarse_pc_type; 2760 KSPType coarse_ksp_type; 2761 PC pc_temp; 2762 2763 PetscInt im_active,active_procs; 2764 PetscBool setsym,issym=PETSC_FALSE; 2765 PetscInt *local_primal_indices=0; 2766 2767 2768 PetscFunctionBegin; 2769 2770 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2771 2772 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 2773 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2774 } 2775 2776 /* Assign global numbering to coarse dofs */ 2777 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2778 2779 im_active = 0; 2780 if (pcis->n) im_active = 1; 2781 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2782 2783 /* adapt coarse problem type */ 2784 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2785 if (pcbddc->current_level < pcbddc->max_levels) { 2786 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2787 if (pcbddc->dbg_flag) { 2788 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active %d,ratio %d). Using direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2789 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2790 } 2791 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2792 } 2793 } else { 2794 if (pcbddc->dbg_flag) { 2795 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2797 } 2798 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2799 } 2800 } 2801 2802 /* creates MATIS object for coarse matrix */ 2803 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2804 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2805 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2806 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2807 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2808 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2809 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 2811 /* TEST */ 2812 2813 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2814 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2815 /* just for now */ 2816 coarse_pc_type = PCREDUNDANT; 2817 coarse_ksp_type = KSPPREONLY; 2818 } else { 2819 #if 1 2820 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2821 /* just for now */ 2822 coarse_pc_type = PCBDDC; 2823 coarse_ksp_type = KSPRICHARDSON; 2824 } 2825 #else 2826 Mat coarse_mat_is_subassembled; 2827 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat_is_subassembled);CHKERRQ(ierr); 2828 2829 MPI_Comm prec_comm; 2830 MPI_Comm coarse_comm; 2831 PetscScalar *temp_coarse_mat_vals; 2832 PetscScalar *ins_coarse_mat_vals; 2833 PetscInt *ins_local_primal_indices; 2834 PetscMPIInt *localsizes2,*localdispl2; 2835 PetscMPIInt size_prec_comm; 2836 PetscMPIInt rank_prec_comm; 2837 PetscMPIInt master_proc=0; 2838 PetscInt ins_local_primal_size; 2839 PetscMPIInt *ranks_recv=0; 2840 PetscMPIInt count_recv=0; 2841 PetscMPIInt rank_coarse_proc_send_to=-1; 2842 PetscMPIInt coarse_color = MPI_UNDEFINED; 2843 ISLocalToGlobalMapping coarse_ISLG; 2844 PetscInt i,j,k; 2845 PetscInt offset,offset2; 2846 PetscInt *dnz; 2847 PetscInt *replicated_local_primal_indices=0,*local_primal_displacements=0,*local_primal_sizes=0; 2848 PetscInt replicated_primal_size=0; 2849 ins_local_primal_indices = 0; 2850 ins_coarse_mat_vals = 0; 2851 localsizes2 = 0; 2852 localdispl2 = 0; 2853 temp_coarse_mat_vals = 0; 2854 coarse_ISLG = 0; 2855 IS coarse_IS; 2856 Mat matis_coarse_local_mat; 2857 2858 /* OLD version from here */ 2859 2860 /* Construct needed data structures for message passing */ 2861 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 2862 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2863 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2864 j = size_prec_comm; 2865 ierr = PetscMalloc(j*sizeof(*local_primal_sizes),&local_primal_sizes);CHKERRQ(ierr); 2866 ierr = PetscMalloc(j*sizeof(*local_primal_displacements),&local_primal_displacements);CHKERRQ(ierr); 2867 /* Gather local_primal_size information for all processes */ 2868 ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2869 replicated_primal_size = 0; 2870 for (i=0; i<j; i++) { 2871 local_primal_displacements[i] = replicated_primal_size ; 2872 replicated_primal_size += local_primal_sizes[i]; 2873 } 2874 2875 #if defined(PETSC_HAVE_METIS) 2876 /* we need additional variables */ 2877 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2878 MetisInt *metis_coarse_subdivision; 2879 MetisInt options[METIS_NOPTIONS]; 2880 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2881 PetscMPIInt procs_jumps_coarse_comm; 2882 PetscMPIInt *coarse_subdivision; 2883 PetscMPIInt *total_count_recv; 2884 PetscMPIInt *total_ranks_recv; 2885 PetscMPIInt *displacements_recv; 2886 PetscMPIInt *my_faces_connectivity; 2887 PetscMPIInt *petsc_faces_adjncy; 2888 MetisInt *faces_adjncy; 2889 MetisInt *faces_xadj; 2890 PetscMPIInt *number_of_faces; 2891 PetscMPIInt *faces_displacements; 2892 PetscInt *array_int; 2893 PetscMPIInt my_faces=0; 2894 PetscMPIInt total_faces=0; 2895 PetscInt ranks_stretching_ratio; 2896 2897 /* define some quantities */ 2898 coarse_pc_type = PCBDDC; 2899 coarse_ksp_type = KSPRICHARDSON; 2900 2901 /* details of coarse decomposition */ 2902 n_subdomains = active_procs; 2903 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2904 ranks_stretching_ratio = size_prec_comm/active_procs; 2905 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2906 2907 /* build CSR graph of subdomains' connectivity */ 2908 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2909 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2910 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2911 for (j=0;j<pcis->n_shared[i];j++){ 2912 array_int[ pcis->shared[i][j] ]+=1; 2913 } 2914 } 2915 for (i=1;i<pcis->n_neigh;i++){ 2916 for (j=0;j<pcis->n_shared[i];j++){ 2917 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2918 my_faces++; 2919 break; 2920 } 2921 } 2922 } 2923 2924 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2925 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2926 my_faces=0; 2927 for (i=1;i<pcis->n_neigh;i++){ 2928 for (j=0;j<pcis->n_shared[i];j++){ 2929 if (array_int[ pcis->shared[i][j] ] > 0 ){ 2930 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2931 my_faces++; 2932 break; 2933 } 2934 } 2935 } 2936 if (rank_prec_comm == master_proc) { 2937 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2938 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2939 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2940 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2941 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2942 } 2943 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2944 if (rank_prec_comm == master_proc) { 2945 faces_xadj[0]=0; 2946 faces_displacements[0]=0; 2947 j=0; 2948 for (i=1;i<size_prec_comm+1;i++) { 2949 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2950 if (number_of_faces[i-1]) { 2951 j++; 2952 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2953 } 2954 } 2955 } 2956 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2957 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2958 ierr = PetscFree(array_int);CHKERRQ(ierr); 2959 if (rank_prec_comm == master_proc) { 2960 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2961 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2962 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2963 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2964 } 2965 2966 if ( rank_prec_comm == master_proc ) { 2967 2968 PetscInt heuristic_for_metis=3; 2969 2970 ncon=1; 2971 faces_nvtxs=n_subdomains; 2972 /* partition graoh induced by face connectivity */ 2973 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2974 ierr = METIS_SetDefaultOptions(options); 2975 /* we need a contiguous partition of the coarse mesh */ 2976 options[METIS_OPTION_CONTIG]=1; 2977 options[METIS_OPTION_NITER]=30; 2978 if (pcbddc->coarsening_ratio > 1) { 2979 if (n_subdomains>n_parts*heuristic_for_metis) { 2980 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2981 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2982 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2983 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2984 } else { 2985 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2986 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr); 2987 } 2988 } else { 2989 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 2990 } 2991 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2992 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2993 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 2994 2995 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2996 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2997 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2998 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2999 } 3000 3001 /* Create new communicator for coarse problem splitting the old one */ 3002 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3003 coarse_color=0; /* for communicator splitting */ 3004 } 3005 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3006 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3007 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3008 3009 if ( coarse_color == 0 ) { 3010 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3011 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3012 } else { 3013 rank_coarse_comm = MPI_PROC_NULL; 3014 } 3015 3016 /* master proc take care of arranging and distributing coarse information */ 3017 if (rank_coarse_comm == master_proc) { 3018 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3019 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3020 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 3021 /* some initializations */ 3022 displacements_recv[0]=0; 3023 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3024 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3025 for (j=0;j<size_coarse_comm;j++) { 3026 for (i=0;i<size_prec_comm;i++) { 3027 if (coarse_subdivision[i]==j) total_count_recv[j]++; 3028 } 3029 } 3030 /* displacements needed for scatterv of total_ranks_recv */ 3031 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3032 3033 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3034 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3035 for (j=0;j<size_coarse_comm;j++) { 3036 for (i=0;i<size_prec_comm;i++) { 3037 if (coarse_subdivision[i]==j) { 3038 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3039 total_count_recv[j]+=1; 3040 } 3041 } 3042 } 3043 /*for (j=0;j<size_coarse_comm;j++) { 3044 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3045 for (i=0;i<total_count_recv[j];i++) { 3046 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3047 } 3048 printf("\n"); 3049 }*/ 3050 3051 /* identify new decomposition in terms of ranks in the old communicator */ 3052 for (i=0;i<n_subdomains;i++) { 3053 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3054 } 3055 /*printf("coarse_subdivision in old end new ranks\n"); 3056 for (i=0;i<size_prec_comm;i++) 3057 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 3058 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3059 } else { 3060 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3061 } 3062 printf("\n");*/ 3063 } 3064 3065 /* Scatter new decomposition for send details */ 3066 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3067 /* Scatter receiving details to members of coarse decomposition */ 3068 if ( coarse_color == 0) { 3069 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3070 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3071 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3072 } 3073 3074 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3075 if (coarse_color == 0) { 3076 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3077 for (i=0;i<count_recv;i++) 3078 printf("%d ",ranks_recv[i]); 3079 printf("\n"); 3080 }*/ 3081 3082 if (rank_prec_comm == master_proc) { 3083 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3084 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3085 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 3086 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3087 } 3088 #endif 3089 3090 3091 if(pcbddc->coarsening_ratio == 1) { 3092 ins_local_primal_size = pcbddc->local_primal_size; 3093 ins_local_primal_indices = local_primal_indices; 3094 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3095 /* nonzeros */ 3096 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 3097 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 3098 for (i=0;i<ins_local_primal_size;i++) { 3099 dnz[i] = ins_local_primal_size; 3100 } 3101 } else { 3102 PetscMPIInt send_size; 3103 PetscMPIInt *send_buffer; 3104 PetscInt *aux_ins_indices; 3105 PetscInt ii,jj; 3106 MPI_Request *requests; 3107 3108 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3109 /* reusing local_primal_displacements and replicated_primal_size */ 3110 ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr); 3111 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&local_primal_displacements);CHKERRQ(ierr); 3112 replicated_primal_size = count_recv; 3113 j = 0; 3114 for (i=0;i<count_recv;i++) { 3115 local_primal_displacements[i] = j; 3116 j += local_primal_sizes[ranks_recv[i]]; 3117 } 3118 local_primal_displacements[count_recv] = j; 3119 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&replicated_local_primal_indices);CHKERRQ(ierr); 3120 /* allocate auxiliary space */ 3121 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3122 ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3123 ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3124 /* allocate stuffs for message massing */ 3125 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3126 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 3127 /* send indices to be inserted */ 3128 for (i=0;i<count_recv;i++) { 3129 send_size = local_primal_sizes[ranks_recv[i]]; 3130 ierr = MPI_Irecv(&replicated_local_primal_indices[local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 3131 } 3132 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3133 send_size = pcbddc->local_primal_size; 3134 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 3135 for (i=0;i<send_size;i++) { 3136 send_buffer[i]=(PetscMPIInt)local_primal_indices[i]; 3137 } 3138 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3139 } 3140 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3141 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3142 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3143 } 3144 j = 0; 3145 for (i=0;i<count_recv;i++) { 3146 ii = local_primal_displacements[i+1]-local_primal_displacements[i]; 3147 localsizes2[i] = ii*ii; 3148 localdispl2[i] = j; 3149 j += localsizes2[i]; 3150 jj = local_primal_displacements[i]; 3151 /* it counts the coarse subdomains sharing the coarse node */ 3152 for (k=0;k<ii;k++) { 3153 aux_ins_indices[replicated_local_primal_indices[jj+k]] += 1; 3154 } 3155 } 3156 /* temp_coarse_mat_vals used to store matrix values to be received */ 3157 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3158 /* evaluate how many values I will insert in coarse mat */ 3159 ins_local_primal_size = 0; 3160 for (i=0;i<coarse_size;i++) { 3161 if (aux_ins_indices[i]) { 3162 ins_local_primal_size++; 3163 } 3164 } 3165 /* evaluate indices I will insert in coarse mat */ 3166 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3167 j = 0; 3168 for(i=0;i<coarse_size;i++) { 3169 if(aux_ins_indices[i]) { 3170 ins_local_primal_indices[j] = i; 3171 j++; 3172 } 3173 } 3174 /* processes partecipating in coarse problem receive matrix data from their friends */ 3175 for (i=0;i<count_recv;i++) { 3176 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3177 } 3178 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3179 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 3180 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3181 } 3182 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3183 /* nonzeros */ 3184 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 3185 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 3186 /* use aux_ins_indices to realize a global to local mapping */ 3187 j=0; 3188 for(i=0;i<coarse_size;i++){ 3189 if(aux_ins_indices[i]==0){ 3190 aux_ins_indices[i]=-1; 3191 } else { 3192 aux_ins_indices[i]=j; 3193 j++; 3194 } 3195 } 3196 for (i=0;i<count_recv;i++) { 3197 j = local_primal_sizes[ranks_recv[i]]; 3198 for (k=0;k<j;k++) { 3199 dnz[aux_ins_indices[replicated_local_primal_indices[local_primal_displacements[i]+k]]] += j; 3200 } 3201 } 3202 /* check */ 3203 for (i=0;i<ins_local_primal_size;i++) { 3204 if (dnz[i] > ins_local_primal_size) { 3205 dnz[i] = ins_local_primal_size; 3206 } 3207 } 3208 ierr = PetscFree(requests);CHKERRQ(ierr); 3209 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3210 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3211 } 3212 /* create local to global mapping needed by coarse MATIS */ 3213 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 3214 coarse_comm = prec_comm; 3215 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3216 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3217 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3218 /* Now create and fill up coarse matrix */ 3219 3220 3221 3222 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr); 3223 ierr = MatSetUp(coarse_mat);CHKERRQ(ierr); 3224 ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3225 ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr); 3226 ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr); 3227 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3228 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3229 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3230 /* preallocation */ 3231 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 3232 ierr = PetscFree(dnz);CHKERRQ(ierr); 3233 /* insert values */ 3234 if (pcbddc->coarsening_ratio == 1) { 3235 ins_coarse_mat_vals = coarse_submat_vals; 3236 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 3237 } else { 3238 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3239 for (k=0;k<replicated_primal_size;k++) { 3240 offset = local_primal_displacements[k]; 3241 offset2 = localdispl2[k]; 3242 ins_local_primal_size = local_primal_displacements[k+1]-local_primal_displacements[k]; 3243 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3244 for (j=0;j<ins_local_primal_size;j++){ 3245 ins_local_primal_indices[j]=(PetscInt)replicated_local_primal_indices[offset+j]; 3246 } 3247 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3248 ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3249 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 3250 } 3251 } 3252 ins_local_primal_indices = 0; 3253 ins_coarse_mat_vals = 0; 3254 ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 { /* check */ 3257 Vec _lvec,_rvec; 3258 PetscReal _infty_error; 3259 3260 //ierr = MatView(mat,0);CHKERRQ(ierr); 3261 //ierr = MatView(new_mat,0);CHKERRQ(ierr); 3262 ierr = MatGetVecs(coarse_mat,&_rvec,&_lvec);CHKERRQ(ierr); 3263 ierr = VecSetRandom(_rvec,NULL);CHKERRQ(ierr); 3264 ierr = MatMult(coarse_mat,_rvec,_lvec);CHKERRQ(ierr); 3265 ierr = VecScale(_lvec,-1.0);CHKERRQ(ierr); 3266 ierr = MatMultAdd(coarse_mat_is_subassembled,_rvec,_lvec,_lvec);CHKERRQ(ierr); 3267 ierr = VecNorm(_lvec,NORM_INFINITY,&_infty_error);CHKERRQ(ierr); 3268 ierr = PetscPrintf(PetscObjectComm((PetscObject)coarse_mat),"Infinity error subassembling against old code %1.6e\n",_infty_error); 3269 ierr = VecDestroy(&_rvec);CHKERRQ(ierr); 3270 ierr = VecDestroy(&_lvec);CHKERRQ(ierr); 3271 } 3272 ierr = MatDestroy(&coarse_mat_is_subassembled);CHKERRQ(ierr); 3273 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3274 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3275 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 3276 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 3277 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 3278 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 3279 ierr = PetscFree(local_primal_sizes);CHKERRQ(ierr); 3280 ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr); 3281 ierr = PetscFree(replicated_local_primal_indices);CHKERRQ(ierr); 3282 } 3283 #endif 3284 3285 /* propagate symmetry info */ 3286 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3287 3288 /* create local to global scatters */ 3289 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3290 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3291 3292 /* free memory no longer needed */ 3293 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3294 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3295 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3296 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3297 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3298 3299 /* Compute coarse null space */ 3300 CoarseNullSpace = 0; 3301 if (pcbddc->NullSpace) { 3302 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3303 } 3304 3305 /* KSP for coarse problem */ 3306 { 3307 PetscBool isbddc=PETSC_FALSE; 3308 PetscInt max_it; 3309 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3310 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3311 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3312 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3313 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3314 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3315 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3316 /* Allow user's customization */ 3317 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3318 /* Set Up PC for coarse problem BDDC */ 3319 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3320 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3321 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3322 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3323 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3324 if (CoarseNullSpace) { 3325 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3326 } 3327 if (pcbddc->dbg_flag) { 3328 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"----------------Level %d -> Setting up next level---------------\n",pcbddc->current_level);CHKERRQ(ierr); 3329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3330 } 3331 } else { 3332 if (CoarseNullSpace) { 3333 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3334 } 3335 } 3336 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3337 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3338 3339 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3340 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3341 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3342 if (max_it == 1) { 3343 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3344 if (isbddc) { 3345 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 3346 } 3347 } 3348 } 3349 /* Check coarse problem if requested */ 3350 if (pcbddc->dbg_flag) { 3351 KSP check_ksp; 3352 PC check_pc; 3353 Vec check_vec; 3354 PetscInt its; 3355 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3356 KSPType check_ksp_type; 3357 3358 /* Create ksp object suitable for extreme eigenvalues' estimation */ 3359 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr); 3360 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3361 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3362 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3363 if (issym) check_ksp_type = KSPCG; 3364 else check_ksp_type = KSPGMRES; 3365 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 3366 } else { 3367 check_ksp_type = KSPPREONLY; 3368 } 3369 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3370 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3371 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3372 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3373 /* create random vec */ 3374 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3375 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3376 if (CoarseNullSpace) { 3377 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3378 } 3379 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3380 /* solve coarse problem */ 3381 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3382 if (CoarseNullSpace) { 3383 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3384 } 3385 /* check coarse problem residual error */ 3386 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3387 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3388 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3389 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3390 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3391 /* get eigenvalue estimation if inexact */ 3392 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3393 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3394 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",its,check_ksp_type);CHKERRQ(ierr); 3396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3397 } 3398 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 3399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 3400 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3401 } 3402 if (pcbddc->dbg_flag) { 3403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3404 } 3405 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3406 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3412 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3413 { 3414 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3415 PC_IS* pcis = (PC_IS*)pc->data; 3416 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3417 PetscInt i,j,coarse_size; 3418 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3419 PetscErrorCode ierr; 3420 3421 PetscFunctionBegin; 3422 /* get indices in local ordering for vertices and constraints */ 3423 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3424 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3425 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3426 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3427 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3428 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3429 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3430 3431 /* Compute global number of coarse dofs */ 3432 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3433 3434 /* check numbering */ 3435 if (pcbddc->dbg_flag) { 3436 PetscScalar coarsesum,*array; 3437 3438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3441 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3442 for (i=0;i<pcbddc->local_primal_size;i++) { 3443 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3444 } 3445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3447 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3448 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3449 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3450 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3451 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3452 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3453 for (i=0;i<pcis->n;i++) { 3454 if (array[i] == 1.0) { 3455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3456 } 3457 } 3458 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3459 for (i=0;i<pcis->n;i++) { 3460 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3461 } 3462 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3463 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3464 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3465 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3466 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3467 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3468 if (pcbddc->dbg_flag > 1) { 3469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3470 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3471 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3472 for (i=0;i<pcbddc->local_primal_size;i++) { 3473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3474 } 3475 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3476 } 3477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3478 } 3479 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3480 /* get back data */ 3481 *coarse_size_n = coarse_size; 3482 *local_primal_indices_n = local_primal_indices; 3483 PetscFunctionReturn(0); 3484 } 3485 3486