1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCCreateWorkVectors" 110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 111 { 112 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 113 PC_IS *pcis = (PC_IS*)pc->data; 114 VecType impVecType; 115 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 120 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 121 local_primal_size = n_constraints+n_vertices; 122 n_R = pcis->n-n_vertices; 123 /* local work vectors */ 124 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 125 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 126 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 127 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 128 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 129 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 130 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 131 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 132 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 133 if (n_constraints) { 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 137 } 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 144 { 145 PetscErrorCode ierr; 146 /* pointers to pcis and pcbddc */ 147 PC_IS* pcis = (PC_IS*)pc->data; 148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 149 /* submatrices of local problem */ 150 Mat A_RV,A_VR,A_VV; 151 /* working matrices */ 152 Mat M1,M2,M3,C_CR; 153 /* working vectors */ 154 Vec vec1_C,vec2_C,vec1_V,vec2_V; 155 /* additional working stuff */ 156 IS is_aux; 157 ISLocalToGlobalMapping BtoNmap; 158 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 159 const PetscScalar *array,*row_cmat_values; 160 const PetscInt *row_cmat_indices,*idx_R_local; 161 PetscInt *vertices,*idx_V_B,*auxindices; 162 PetscInt n_vertices,n_constraints,size_of_constraint; 163 PetscInt i,j,n_R,n_D,n_B; 164 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 165 /* Vector and matrix types */ 166 VecType impVecType; 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 184 185 /* Allocating some extra storage just to be safe */ 186 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 187 for (i=0;i<pcis->n;i++) auxindices[i]=i; 188 189 /* vertices in boundary numbering */ 190 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 191 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 192 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 194 if (i != n_vertices) { 195 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 196 } 197 198 /* some work vectors on vertices and/or constraints */ 199 if (n_vertices) { 200 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 201 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 202 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 203 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 204 } 205 if (n_constraints) { 206 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 208 } 209 210 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 211 if (n_constraints) { 212 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 213 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 214 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 215 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 216 217 /* Extract constraints on R nodes: C_{CR} */ 218 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 220 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 221 222 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 223 for (i=0;i<n_constraints;i++) { 224 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 225 /* Get row of constraint matrix in R numbering */ 226 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 227 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 228 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 229 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 230 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 231 /* Solve for row of constraint matrix in R numbering */ 232 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 233 /* Set values in local_auxmat2 */ 234 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 235 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 237 } 238 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 241 242 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 243 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 244 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 245 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 246 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 247 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(M1);CHKERRQ(ierr); 249 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 250 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 251 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 252 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 254 ierr = MatDestroy(&M2);CHKERRQ(ierr); 255 ierr = MatDestroy(&M3);CHKERRQ(ierr); 256 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 257 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 258 } 259 260 /* Get submatrices from subdomain matrix */ 261 if (n_vertices) { 262 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 265 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 266 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 267 } 268 269 /* Matrix of coarse basis functions (local) */ 270 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 271 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 272 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 273 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 274 if (pcbddc->switch_static || pcbddc->dbg_flag) { 275 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 276 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 277 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 279 } 280 281 if (pcbddc->dbg_flag) { 282 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 283 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 284 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 285 } 286 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 287 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 288 289 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 290 291 /* vertices */ 292 for (i=0;i<n_vertices;i++) { 293 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 294 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 295 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 296 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 297 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 298 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 299 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 300 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 301 if (n_constraints) { 302 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 303 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 304 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 305 } 306 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 307 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 308 309 /* Set values in coarse basis function and subdomain part of coarse_mat */ 310 /* coarse basis functions */ 311 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 312 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 313 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 314 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 315 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 317 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 318 if (pcbddc->switch_static || pcbddc->dbg_flag) { 319 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 320 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 322 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 323 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 324 } 325 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 326 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 327 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 329 if (n_constraints) { 330 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 331 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 332 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 333 } 334 335 /* check */ 336 if (pcbddc->dbg_flag) { 337 /* assemble subdomain vector on local nodes */ 338 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 339 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 340 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 342 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 343 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 344 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 345 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 346 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 356 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 357 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 358 /* check saddle point solution */ 359 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 360 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 361 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 362 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 363 /* shift by the identity matrix */ 364 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 365 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 366 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 367 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 368 } 369 } 370 371 /* constraints */ 372 for (i=0;i<n_constraints;i++) { 373 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 374 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 375 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 376 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 377 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 378 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 379 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 381 if (n_vertices) { 382 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 383 } 384 /* Set values in coarse basis function and subdomain part of coarse_mat */ 385 /* coarse basis functions */ 386 j = i+n_vertices; /* don't touch this! */ 387 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 388 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 389 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 390 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 391 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 392 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 393 if (pcbddc->switch_static || pcbddc->dbg_flag) { 394 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 399 } 400 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 401 if (n_vertices) { 402 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 403 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 405 } 406 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 407 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 408 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 409 410 if (pcbddc->dbg_flag) { 411 /* assemble subdomain vector on nodes */ 412 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 414 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 416 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 417 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 418 /* assemble subdomain vector of lagrange multipliers */ 419 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 420 if (n_vertices) { 421 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 } 425 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 429 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 430 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 431 /* check saddle point solution */ 432 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 433 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 434 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 435 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 436 /* shift by the identity matrix */ 437 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 441 } 442 } 443 /* call assembling routines for local coarse basis */ 444 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 445 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 447 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 448 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 449 } 450 451 /* compute other basis functions for non-symmetric problems */ 452 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 453 if (!setsym || (setsym && !issym)) { 454 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 455 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 456 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 457 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 458 if (pcbddc->switch_static || pcbddc->dbg_flag) { 459 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 460 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 461 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 462 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 463 } 464 for (i=0;i<pcbddc->local_primal_size;i++) { 465 if (n_constraints) { 466 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 467 for (j=0;j<n_constraints;j++) { 468 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 469 } 470 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 471 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 472 } 473 if (i<n_vertices) { 474 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 475 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 477 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 478 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 479 if (n_constraints) { 480 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 481 } 482 } else { 483 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 484 } 485 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 487 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 490 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 492 if (i<n_vertices) { 493 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 494 } 495 if (pcbddc->switch_static || pcbddc->dbg_flag) { 496 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 498 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 499 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 500 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 501 } 502 503 if (pcbddc->dbg_flag) { 504 /* assemble subdomain vector on nodes */ 505 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 509 if (i<n_vertices) { 510 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 511 } 512 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 513 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 514 /* assemble subdomain vector of lagrange multipliers */ 515 for (j=0;j<pcbddc->local_primal_size;j++) { 516 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 517 } 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 /* check saddle point solution */ 521 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 522 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 524 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 525 /* shift by the identity matrix */ 526 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 527 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 528 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 530 } 531 } 532 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 if (pcbddc->switch_static || pcbddc->dbg_flag) { 535 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 537 } 538 } 539 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 540 /* Checking coarse_sub_mat and coarse basis functios */ 541 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 542 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 543 if (pcbddc->dbg_flag) { 544 Mat coarse_sub_mat; 545 Mat AUXMAT,TM1,TM2,TM3,TM4; 546 Mat coarse_phi_D,coarse_phi_B; 547 Mat coarse_psi_D,coarse_psi_B; 548 Mat A_II,A_BB,A_IB,A_BI; 549 MatType checkmattype=MATSEQAIJ; 550 PetscReal real_value; 551 552 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 553 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 554 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 555 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 556 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 557 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 558 if (pcbddc->coarse_psi_B) { 559 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 560 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 561 } 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 563 564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 567 if (pcbddc->coarse_psi_B) { 568 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 569 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 570 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 571 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 572 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 573 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 574 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 575 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 576 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 577 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 578 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 579 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 580 } else { 581 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 582 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 583 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 584 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 585 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 586 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 } 590 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 591 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 592 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 593 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 594 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 595 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 600 for (i=0;i<pcbddc->local_primal_size;i++) { 601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 602 } 603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 604 for (i=0;i<pcbddc->local_primal_size;i++) { 605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 606 } 607 if (pcbddc->coarse_psi_B) { 608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 609 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 611 } 612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 613 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 615 } 616 } 617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 618 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 619 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 620 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 621 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 622 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 623 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 624 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 625 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 626 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 627 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 628 if (pcbddc->coarse_psi_B) { 629 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 630 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 631 } 632 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 633 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 634 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 635 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 636 } 637 /* free memory */ 638 if (n_vertices) { 639 ierr = PetscFree(vertices);CHKERRQ(ierr); 640 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 641 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 643 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 644 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 645 } 646 if (n_constraints) { 647 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 648 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 651 } 652 ierr = PetscFree(auxindices);CHKERRQ(ierr); 653 /* get back data */ 654 *coarse_submat_vals_n = coarse_submat_vals; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 661 { 662 PC_IS* pcis = (PC_IS*)(pc->data); 663 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 664 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 665 /* manage repeated solves */ 666 MatReuse reuse; 667 MatStructure matstruct; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 /* get mat flags */ 672 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 673 reuse = MAT_INITIAL_MATRIX; 674 if (pc->setupcalled) { 675 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 676 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 677 if (matstruct == SAME_NONZERO_PATTERN) { 678 reuse = MAT_REUSE_MATRIX; 679 } else { 680 reuse = MAT_INITIAL_MATRIX; 681 } 682 } 683 if (reuse == MAT_INITIAL_MATRIX) { 684 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 688 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 689 } 690 691 /* transform local matrices if needed */ 692 if (pcbddc->use_change_of_basis) { 693 Mat change_mat_all; 694 PetscScalar *row_cmat_values; 695 PetscInt *row_cmat_indices; 696 PetscInt *nnz,*is_indices,*temp_indices; 697 PetscInt i,j,k,n_D,n_B; 698 699 /* Get Non-overlapping dimensions */ 700 n_B = pcis->n_B; 701 n_D = pcis->n-n_B; 702 703 /* compute nonzero structure of change of basis on all local nodes */ 704 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 705 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 706 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 707 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 708 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 709 k=1; 710 for (i=0;i<n_B;i++) { 711 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 712 nnz[is_indices[i]]=j; 713 if (k < j) k = j; 714 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 715 } 716 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 /* assemble change of basis matrix on the whole set of local dofs */ 718 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 719 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 720 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 721 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 722 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 724 for (i=0;i<n_D;i++) { 725 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 726 } 727 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 729 for (i=0;i<n_B;i++) { 730 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 731 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 732 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 733 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 734 } 735 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 738 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 739 if (i==1) { 740 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 741 } else { 742 Mat work_mat; 743 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 744 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 745 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 746 } 747 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 748 ierr = PetscFree(nnz);CHKERRQ(ierr); 749 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 750 } else { 751 /* without change of basis, the local matrix is unchanged */ 752 if (!pcbddc->local_mat) { 753 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 754 pcbddc->local_mat = matis->A; 755 } 756 } 757 758 /* get submatrices */ 759 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 760 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 761 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 762 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 769 { 770 PC_IS* pcis = (PC_IS*)(pc->data); 771 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 772 IS is_aux1,is_aux2; 773 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 774 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 775 PetscBT bitmask; 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 /* Set Non-overlapping dimensions */ 780 n_B = pcis->n_B; n_D = pcis->n - n_B; 781 /* get vertex indices from constraint matrix */ 782 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 783 /* Set number of constraints */ 784 n_constraints = pcbddc->local_primal_size-n_vertices; 785 /* create auxiliary bitmask */ 786 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 787 for (i=0;i<n_vertices;i++) { 788 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 789 } 790 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 791 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 792 for (i=0, n_R=0; i<pcis->n; i++) { 793 if (!PetscBTLookup(bitmask,i)) { 794 idx_R_local[n_R] = i; 795 n_R++; 796 } 797 } 798 ierr = PetscFree(vertices);CHKERRQ(ierr); 799 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 800 801 /* print some info if requested */ 802 if (pcbddc->dbg_flag) { 803 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 810 } 811 812 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 813 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 814 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 815 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 for (i=0; i<n_D; i++) { 817 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 818 } 819 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0, j=0; i<n_R; i++) { 821 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 822 aux_array1[j++] = i; 823 } 824 } 825 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 826 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 827 for (i=0, j=0; i<n_B; i++) { 828 if (!PetscBTLookup(bitmask,is_indices[i])) { 829 aux_array2[j++] = i; 830 } 831 } 832 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 834 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 835 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 836 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 837 838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 839 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 840 for (i=0, j=0; i<n_R; i++) { 841 if (PetscBTLookup(bitmask,idx_R_local[i])) { 842 aux_array1[j++] = i; 843 } 844 } 845 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 846 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 848 } 849 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 856 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 857 { 858 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 859 PC_IS *pcis = (PC_IS*)pc->data; 860 PC pc_temp; 861 Mat A_RR; 862 Vec vec1,vec2,vec3; 863 MatStructure matstruct; 864 PetscScalar m_one = -1.0; 865 PetscReal value; 866 PetscInt n_D,n_R; 867 PetscBool use_exact,use_exact_reduced; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* Creating PC contexts for local Dirichlet and Neumann problems */ 872 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 873 874 /* DIRICHLET PROBLEM */ 875 /* Matrix for Dirichlet problem is pcis->A_II */ 876 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 877 if (!pcbddc->ksp_D) { /* create object if not yet build */ 878 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 879 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 880 /* default */ 881 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 882 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 883 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 884 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 885 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 886 } 887 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 888 /* Allow user's customization */ 889 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 890 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 891 if (!n_D) { 892 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 893 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 894 } 895 /* Set Up KSP for Dirichlet problem of BDDC */ 896 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 897 /* set ksp_D into pcis data */ 898 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 899 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 900 pcis->ksp_D = pcbddc->ksp_D; 901 902 /* NEUMANN PROBLEM */ 903 /* Matrix for Neumann problem is A_RR -> we need to create it */ 904 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 905 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 906 if (!pcbddc->ksp_R) { /* create object if not yet build */ 907 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 908 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 909 /* default */ 910 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 911 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 912 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 913 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 914 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 915 } 916 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 917 /* Allow user's customization */ 918 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 919 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 920 if (!n_R) { 921 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 922 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 923 } 924 /* Set Up KSP for Neumann problem of BDDC */ 925 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 926 927 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 928 929 /* Dirichlet */ 930 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 931 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 932 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 933 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 934 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 935 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 936 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 937 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 938 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 939 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 940 /* need to be adapted? */ 941 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 942 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 943 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 944 /* print info */ 945 if (pcbddc->dbg_flag) { 946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 } 952 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 953 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 954 } 955 956 /* Neumann */ 957 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 958 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 959 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 960 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 961 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 962 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 963 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 964 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 965 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 966 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 967 /* need to be adapted? */ 968 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 969 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 970 /* print info */ 971 if (pcbddc->dbg_flag) { 972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 973 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 974 } 975 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 976 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 977 } 978 979 /* free Neumann problem's matrix */ 980 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 986 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 987 { 988 PetscErrorCode ierr; 989 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 990 991 PetscFunctionBegin; 992 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 993 if (pcbddc->local_auxmat1) { 994 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 995 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 996 } 997 PetscFunctionReturn(0); 998 } 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1002 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1003 { 1004 PetscErrorCode ierr; 1005 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1006 PC_IS* pcis = (PC_IS*) (pc->data); 1007 const PetscScalar zero = 0.0; 1008 1009 PetscFunctionBegin; 1010 /* Application of PHI^T (or PSI^T) */ 1011 if (pcbddc->coarse_psi_B) { 1012 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1013 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1014 } else { 1015 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1016 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1017 } 1018 /* Scatter data of coarse_rhs */ 1019 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1020 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 1022 /* Local solution on R nodes */ 1023 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1025 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1026 if (pcbddc->switch_static) { 1027 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1028 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1029 } 1030 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1031 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 if (pcbddc->switch_static) { 1035 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 } 1038 1039 /* Coarse solution */ 1040 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1042 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1043 } 1044 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1046 1047 /* Sum contributions from two levels */ 1048 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1049 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1055 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1056 { 1057 PetscErrorCode ierr; 1058 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1059 1060 PetscFunctionBegin; 1061 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1067 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1068 { 1069 PetscErrorCode ierr; 1070 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1071 1072 PetscFunctionBegin; 1073 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /* uncomment for testing purposes */ 1078 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1081 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1082 { 1083 PetscErrorCode ierr; 1084 PC_IS* pcis = (PC_IS*)(pc->data); 1085 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1086 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1087 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1088 MatType impMatType=MATSEQAIJ; 1089 /* one and zero */ 1090 PetscScalar one=1.0,zero=0.0; 1091 /* space to store constraints and their local indices */ 1092 PetscScalar *temp_quadrature_constraint; 1093 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1094 /* iterators */ 1095 PetscInt i,j,k,total_counts,temp_start_ptr; 1096 /* stuff to store connected components stored in pcbddc->mat_graph */ 1097 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1098 PetscInt n_ISForFaces,n_ISForEdges; 1099 /* near null space stuff */ 1100 MatNullSpace nearnullsp; 1101 const Vec *nearnullvecs; 1102 Vec *localnearnullsp; 1103 PetscBool nnsp_has_cnst; 1104 PetscInt nnsp_size; 1105 PetscScalar *array; 1106 /* BLAS integers */ 1107 PetscBLASInt lwork,lierr; 1108 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1109 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1110 /* LAPACK working arrays for SVD or POD */ 1111 PetscBool skip_lapack; 1112 PetscScalar *work; 1113 PetscReal *singular_vals; 1114 #if defined(PETSC_USE_COMPLEX) 1115 PetscReal *rwork; 1116 #endif 1117 #if defined(PETSC_MISSING_LAPACK_GESVD) 1118 PetscBLASInt Blas_one_2=1; 1119 PetscScalar *temp_basis,*correlation_mat; 1120 #else 1121 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1122 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1123 #endif 1124 /* change of basis */ 1125 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1126 PetscBool boolforchange; 1127 PetscBT touched,change_basis; 1128 /* auxiliary stuff */ 1129 PetscInt *nnz,*is_indices,*local_to_B; 1130 /* some quantities */ 1131 PetscInt n_vertices,total_primal_vertices; 1132 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1133 1134 1135 PetscFunctionBegin; 1136 /* Get index sets for faces, edges and vertices from graph */ 1137 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1138 pcbddc->use_vertices = PETSC_TRUE; 1139 } 1140 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1141 /* HACK: provide functions to set change of basis */ 1142 if (!ISForVertices && pcbddc->NullSpace) { 1143 pcbddc->use_change_of_basis = PETSC_TRUE; 1144 pcbddc->use_change_on_faces = PETSC_FALSE; 1145 } 1146 /* print some info */ 1147 if (pcbddc->dbg_flag) { 1148 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1149 i = 0; 1150 if (ISForVertices) { 1151 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1152 } 1153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1154 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1156 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1157 } 1158 /* check if near null space is attached to global mat */ 1159 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1160 if (nearnullsp) { 1161 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1162 } else { /* if near null space is not provided BDDC uses constants by default */ 1163 nnsp_size = 0; 1164 nnsp_has_cnst = PETSC_TRUE; 1165 } 1166 /* get max number of constraints on a single cc */ 1167 max_constraints = nnsp_size; 1168 if (nnsp_has_cnst) max_constraints++; 1169 1170 /* 1171 Evaluate maximum storage size needed by the procedure 1172 - temp_indices will contain start index of each constraint stored as follows 1173 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1174 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1175 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1176 */ 1177 total_counts = n_ISForFaces+n_ISForEdges; 1178 total_counts *= max_constraints; 1179 n_vertices = 0; 1180 if (ISForVertices) { 1181 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1182 } 1183 total_counts += n_vertices; 1184 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1185 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1186 total_counts = 0; 1187 max_size_of_constraint = 0; 1188 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1189 if (i<n_ISForEdges) { 1190 used_IS = &ISForEdges[i]; 1191 } else { 1192 used_IS = &ISForFaces[i-n_ISForEdges]; 1193 } 1194 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1195 total_counts += j; 1196 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1197 } 1198 total_counts *= max_constraints; 1199 total_counts += n_vertices; 1200 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1201 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1202 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1203 /* local to boundary numbering */ 1204 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1205 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1206 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1207 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1208 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1209 /* get local part of global near null space vectors */ 1210 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1211 for (k=0;k<nnsp_size;k++) { 1212 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1213 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 } 1216 1217 /* whether or not to skip lapack calls */ 1218 skip_lapack = PETSC_TRUE; 1219 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1220 1221 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1222 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1223 PetscScalar temp_work; 1224 #if defined(PETSC_MISSING_LAPACK_GESVD) 1225 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1226 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1227 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1228 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1229 #if defined(PETSC_USE_COMPLEX) 1230 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1231 #endif 1232 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1233 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1234 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1235 lwork = -1; 1236 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1237 #if !defined(PETSC_USE_COMPLEX) 1238 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1239 #else 1240 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1241 #endif 1242 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1243 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1244 #else /* on missing GESVD */ 1245 /* SVD */ 1246 PetscInt max_n,min_n; 1247 max_n = max_size_of_constraint; 1248 min_n = max_constraints; 1249 if (max_size_of_constraint < max_constraints) { 1250 min_n = max_size_of_constraint; 1251 max_n = max_constraints; 1252 } 1253 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1254 #if defined(PETSC_USE_COMPLEX) 1255 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1256 #endif 1257 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1258 lwork = -1; 1259 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1260 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1261 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1262 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1263 #if !defined(PETSC_USE_COMPLEX) 1264 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1265 #else 1266 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1267 #endif 1268 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1269 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1270 #endif /* on missing GESVD */ 1271 /* Allocate optimal workspace */ 1272 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1273 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1274 } 1275 /* Now we can loop on constraining sets */ 1276 total_counts = 0; 1277 temp_indices[0] = 0; 1278 /* vertices */ 1279 if (ISForVertices) { 1280 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1281 if (nnsp_has_cnst) { /* consider all vertices */ 1282 for (i=0;i<n_vertices;i++) { 1283 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1284 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1285 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1286 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1287 total_counts++; 1288 } 1289 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1290 PetscBool used_vertex; 1291 for (i=0;i<n_vertices;i++) { 1292 used_vertex = PETSC_FALSE; 1293 k = 0; 1294 while (!used_vertex && k<nnsp_size) { 1295 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1296 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1297 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1298 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1299 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1300 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1301 total_counts++; 1302 used_vertex = PETSC_TRUE; 1303 } 1304 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1305 k++; 1306 } 1307 } 1308 } 1309 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1310 n_vertices = total_counts; 1311 } 1312 1313 /* edges and faces */ 1314 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1315 if (i<n_ISForEdges) { 1316 used_IS = &ISForEdges[i]; 1317 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1318 } else { 1319 used_IS = &ISForFaces[i-n_ISForEdges]; 1320 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1321 } 1322 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1323 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1324 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1325 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1326 /* change of basis should not be performed on local periodic nodes */ 1327 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1328 if (nnsp_has_cnst) { 1329 PetscScalar quad_value; 1330 temp_constraints++; 1331 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1332 for (j=0;j<size_of_constraint;j++) { 1333 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1334 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1335 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1336 } 1337 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1338 if (boolforchange) { 1339 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1340 } 1341 total_counts++; 1342 } 1343 for (k=0;k<nnsp_size;k++) { 1344 PetscReal real_value; 1345 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1346 for (j=0;j<size_of_constraint;j++) { 1347 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1348 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1349 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1350 } 1351 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1352 /* check if array is null on the connected component */ 1353 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1354 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1355 if (real_value > 0.0) { /* keep indices and values */ 1356 temp_constraints++; 1357 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1358 if (boolforchange) { 1359 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1360 } 1361 total_counts++; 1362 } 1363 } 1364 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1365 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1366 if (!pcbddc->use_nnsp_true) { 1367 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1368 1369 #if defined(PETSC_MISSING_LAPACK_GESVD) 1370 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1371 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1372 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1373 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1374 from that computed using LAPACKgesvd 1375 -> This is due to a different computation of eigenvectors in LAPACKheev 1376 -> The quality of the POD-computed basis will be the same */ 1377 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1378 /* Store upper triangular part of correlation matrix */ 1379 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1380 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1381 for (j=0;j<temp_constraints;j++) { 1382 for (k=0;k<j+1;k++) { 1383 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1384 } 1385 } 1386 /* compute eigenvalues and eigenvectors of correlation matrix */ 1387 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1388 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1389 #if !defined(PETSC_USE_COMPLEX) 1390 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1391 #else 1392 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1393 #endif 1394 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1395 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1396 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1397 j=0; 1398 while (j < temp_constraints && singular_vals[j] < tol) j++; 1399 total_counts=total_counts-j; 1400 /* scale and copy POD basis into used quadrature memory */ 1401 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1402 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1403 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1405 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1406 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1407 if (j<temp_constraints) { 1408 PetscInt ii; 1409 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1410 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1411 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1412 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1413 for (k=0;k<temp_constraints-j;k++) { 1414 for (ii=0;ii<size_of_constraint;ii++) { 1415 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1416 } 1417 } 1418 } 1419 #else /* on missing GESVD */ 1420 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1421 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1422 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1423 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1424 #if !defined(PETSC_USE_COMPLEX) 1425 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1426 #else 1427 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1428 #endif 1429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1430 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1431 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1432 k = temp_constraints; 1433 if (k > size_of_constraint) k = size_of_constraint; 1434 j = 0; 1435 while (j < k && singular_vals[k-j-1] < tol) j++; 1436 total_counts = total_counts-temp_constraints+k-j; 1437 #endif /* on missing GESVD */ 1438 } 1439 } 1440 /* free index sets of faces, edges and vertices */ 1441 for (i=0;i<n_ISForFaces;i++) { 1442 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1443 } 1444 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1445 for (i=0;i<n_ISForEdges;i++) { 1446 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1449 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1450 1451 /* free workspace */ 1452 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1453 ierr = PetscFree(work);CHKERRQ(ierr); 1454 #if defined(PETSC_USE_COMPLEX) 1455 ierr = PetscFree(rwork);CHKERRQ(ierr); 1456 #endif 1457 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1458 #if defined(PETSC_MISSING_LAPACK_GESVD) 1459 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1460 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1461 #endif 1462 } 1463 for (k=0;k<nnsp_size;k++) { 1464 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1465 } 1466 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1467 1468 /* set quantities in pcbddc data structure */ 1469 /* n_vertices defines the number of subdomain corners in the primal space */ 1470 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1471 pcbddc->local_primal_size = total_counts; 1472 pcbddc->n_vertices = n_vertices; 1473 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1474 1475 /* Create constraint matrix */ 1476 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1477 /* so we need to set it up properly either with or without change of basis */ 1478 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1479 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1480 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1481 /* array to compute a local numbering of constraints : vertices first then constraints */ 1482 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1483 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1484 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1485 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1486 /* auxiliary stuff for basis change */ 1487 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1488 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1489 1490 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1491 total_primal_vertices=0; 1492 for (i=0;i<pcbddc->local_primal_size;i++) { 1493 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1494 if (size_of_constraint == 1) { 1495 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1496 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1497 aux_primal_minloc[total_primal_vertices]=0; 1498 total_primal_vertices++; 1499 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1500 PetscInt min_loc,min_index; 1501 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1502 /* find first untouched local node */ 1503 k = 0; 1504 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1505 min_index = global_indices[k]; 1506 min_loc = k; 1507 /* search the minimum among global nodes already untouched on the cc */ 1508 for (k=1;k<size_of_constraint;k++) { 1509 /* there can be more than one constraint on a single connected component */ 1510 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1511 min_index = global_indices[k]; 1512 min_loc = k; 1513 } 1514 } 1515 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1516 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1517 aux_primal_minloc[total_primal_vertices]=min_loc; 1518 total_primal_vertices++; 1519 } 1520 } 1521 /* free workspace */ 1522 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1523 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1524 /* permute indices in order to have a sorted set of vertices */ 1525 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1526 1527 /* nonzero structure of constraint matrix */ 1528 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1529 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1530 j=total_primal_vertices; 1531 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1532 if (!PetscBTLookup(change_basis,i)) { 1533 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1534 j++; 1535 } 1536 } 1537 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1538 ierr = PetscFree(nnz);CHKERRQ(ierr); 1539 /* set values in constraint matrix */ 1540 for (i=0;i<total_primal_vertices;i++) { 1541 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1542 } 1543 total_counts = total_primal_vertices; 1544 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1545 if (!PetscBTLookup(change_basis,i)) { 1546 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1547 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1548 total_counts++; 1549 } 1550 } 1551 /* assembling */ 1552 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1556 */ 1557 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1558 if (pcbddc->use_change_of_basis) { 1559 PetscBool qr_needed = PETSC_FALSE; 1560 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1561 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1562 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1563 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1564 /* work arrays */ 1565 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1566 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1567 /* nonzeros per row */ 1568 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1569 if (PetscBTLookup(change_basis,i)) { 1570 qr_needed = PETSC_TRUE; 1571 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1572 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1573 } 1574 } 1575 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1576 ierr = PetscFree(nnz);CHKERRQ(ierr); 1577 /* Set initial identity in the matrix */ 1578 for (i=0;i<pcis->n_B;i++) { 1579 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1580 } 1581 1582 /* Now we loop on the constraints which need a change of basis */ 1583 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1584 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1585 /* Change of basis matrix T computed via QR decomposition of constraints */ 1586 if (qr_needed) { 1587 /* dual and primal dofs on a single cc */ 1588 PetscInt dual_dofs,primal_dofs; 1589 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1590 PetscInt primal_counter; 1591 /* working stuff for GEQRF */ 1592 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1593 PetscBLASInt lqr_work; 1594 /* working stuff for UNGQR */ 1595 PetscScalar *gqr_work,lgqr_work_t; 1596 PetscBLASInt lgqr_work; 1597 /* working stuff for TRTRS */ 1598 PetscScalar *trs_rhs; 1599 PetscBLASInt Blas_NRHS; 1600 /* pointers for values insertion into change of basis matrix */ 1601 PetscInt *start_rows,*start_cols; 1602 PetscScalar *start_vals; 1603 /* working stuff for values insertion */ 1604 PetscBT is_primal; 1605 1606 /* space to store Q */ 1607 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1608 /* first we issue queries for optimal work */ 1609 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1610 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1611 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1612 lqr_work = -1; 1613 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1614 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1615 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1616 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1617 lgqr_work = -1; 1618 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1619 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1620 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1621 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1622 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1623 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1624 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1625 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1626 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1627 /* array to store scaling factors for reflectors */ 1628 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1629 /* array to store rhs and solution of triangular solver */ 1630 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1631 /* array to store whether a node is primal or not */ 1632 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1633 for (i=0;i<total_primal_vertices;i++) { 1634 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1635 } 1636 1637 /* allocating workspace for check */ 1638 if (pcbddc->dbg_flag) { 1639 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1641 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1642 } 1643 1644 /* loop on constraints and see whether or not they need a change of basis */ 1645 /* -> using implicit ordering contained in temp_indices data */ 1646 total_counts = pcbddc->n_vertices; 1647 primal_counter = total_counts; 1648 while (total_counts<pcbddc->local_primal_size) { 1649 primal_dofs = 1; 1650 if (PetscBTLookup(change_basis,total_counts)) { 1651 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1652 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1653 primal_dofs++; 1654 } 1655 /* get constraint info */ 1656 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1657 dual_dofs = size_of_constraint-primal_dofs; 1658 1659 /* copy quadrature constraints for change of basis check */ 1660 if (pcbddc->dbg_flag) { 1661 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1662 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1663 } 1664 1665 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1666 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1667 1668 /* compute QR decomposition of constraints */ 1669 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1670 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1671 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1672 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1673 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1674 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1675 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1676 1677 /* explictly compute R^-T */ 1678 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1679 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1680 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1681 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1683 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1684 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1685 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1686 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1687 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1688 1689 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1690 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1691 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1692 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1693 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1694 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1695 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1696 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1697 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1698 1699 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1700 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1701 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1702 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1703 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1705 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1706 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1708 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1709 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1710 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1711 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1712 1713 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1714 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1715 /* insert cols for primal dofs */ 1716 for (j=0;j<primal_dofs;j++) { 1717 start_vals = &qr_basis[j*size_of_constraint]; 1718 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1719 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 /* insert cols for dual dofs */ 1722 for (j=0,k=0;j<dual_dofs;k++) { 1723 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1724 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1725 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1726 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1727 j++; 1728 } 1729 } 1730 1731 /* check change of basis */ 1732 if (pcbddc->dbg_flag) { 1733 PetscInt ii,jj; 1734 PetscBool valid_qr=PETSC_TRUE; 1735 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1736 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1739 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1741 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1742 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1743 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1744 for (jj=0;jj<size_of_constraint;jj++) { 1745 for (ii=0;ii<primal_dofs;ii++) { 1746 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1747 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1748 } 1749 } 1750 if (!valid_qr) { 1751 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1752 for (jj=0;jj<size_of_constraint;jj++) { 1753 for (ii=0;ii<primal_dofs;ii++) { 1754 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1755 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1756 } 1757 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1758 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1759 } 1760 } 1761 } 1762 } else { 1763 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1764 } 1765 } 1766 /* increment primal counter */ 1767 primal_counter += primal_dofs; 1768 } else { 1769 if (pcbddc->dbg_flag) { 1770 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1771 } 1772 } 1773 /* increment constraint counter total_counts */ 1774 total_counts += primal_dofs; 1775 } 1776 if (pcbddc->dbg_flag) { 1777 ierr = PetscFree(work);CHKERRQ(ierr); 1778 } 1779 /* free workspace */ 1780 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1781 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1782 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1783 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1784 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1785 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1786 } 1787 /* assembling */ 1788 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1789 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790 /* 1791 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1792 */ 1793 } 1794 if (pcbddc->dbg_flag) { 1795 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1796 } 1797 /* free workspace */ 1798 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1799 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1800 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1801 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1802 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1803 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1804 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1805 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1811 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1812 { 1813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1814 PC_IS *pcis = (PC_IS*)pc->data; 1815 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1816 PetscInt bs,ierr,i,vertex_size; 1817 PetscViewer viewer=pcbddc->dbg_viewer; 1818 1819 PetscFunctionBegin; 1820 /* Init local Graph struct */ 1821 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1822 1823 /* Check validity of the csr graph passed in by the user */ 1824 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1825 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1826 } 1827 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1828 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1829 Mat mat_adj; 1830 const PetscInt *xadj,*adjncy; 1831 PetscBool flg_row=PETSC_TRUE; 1832 1833 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1834 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1835 if (!flg_row) { 1836 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1837 } 1838 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1839 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1840 if (!flg_row) { 1841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1842 } 1843 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1844 } 1845 1846 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1847 vertex_size = 1; 1848 if (!pcbddc->n_ISForDofs) { 1849 IS *custom_ISForDofs; 1850 1851 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1852 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1853 for (i=0;i<bs;i++) { 1854 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1855 } 1856 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1857 /* remove my references to IS objects */ 1858 for (i=0;i<bs;i++) { 1859 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1860 } 1861 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1862 } else { /* mat block size as vertex size (used for elasticity) */ 1863 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1864 } 1865 1866 /* Setup of Graph */ 1867 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1868 1869 /* Graph's connected components analysis */ 1870 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1871 1872 /* print some info to stdout */ 1873 if (pcbddc->dbg_flag) { 1874 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1875 } 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1881 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1882 { 1883 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1884 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 n = 0; 1889 vertices = 0; 1890 if (pcbddc->ConstraintMatrix) { 1891 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1892 for (i=0;i<local_primal_size;i++) { 1893 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1894 if (size_of_constraint == 1) n++; 1895 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1896 } 1897 if (vertices_idx) { 1898 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1899 n = 0; 1900 for (i=0;i<local_primal_size;i++) { 1901 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1902 if (size_of_constraint == 1) { 1903 vertices[n++]=row_cmat_indices[0]; 1904 } 1905 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1906 } 1907 } 1908 } 1909 *n_vertices = n; 1910 if (vertices_idx) *vertices_idx = vertices; 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1916 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1917 { 1918 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1919 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1920 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1921 PetscBT touched; 1922 PetscErrorCode ierr; 1923 1924 /* This function assumes that the number of local constraints per connected component 1925 is not greater than the number of nodes defined for the connected component 1926 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1927 PetscFunctionBegin; 1928 n = 0; 1929 constraints_index = 0; 1930 if (pcbddc->ConstraintMatrix) { 1931 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1932 max_size_of_constraint = 0; 1933 for (i=0;i<local_primal_size;i++) { 1934 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1935 if (size_of_constraint > 1) { 1936 n++; 1937 } 1938 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1939 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1940 } 1941 if (constraints_idx) { 1942 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1943 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1944 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1945 n = 0; 1946 for (i=0;i<local_primal_size;i++) { 1947 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1948 if (size_of_constraint > 1) { 1949 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1950 /* find first untouched local node */ 1951 j = 0; 1952 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1953 min_index = row_cmat_global_indices[j]; 1954 min_loc = j; 1955 /* search the minimum among nodes not yet touched on the connected component 1956 since there can be more than one constraint on a single cc */ 1957 for (j=1;j<size_of_constraint;j++) { 1958 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1959 min_index = row_cmat_global_indices[j]; 1960 min_loc = j; 1961 } 1962 } 1963 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1964 constraints_index[n++] = row_cmat_indices[min_loc]; 1965 } 1966 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1967 } 1968 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1969 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1970 } 1971 } 1972 *n_constraints = n; 1973 if (constraints_idx) *constraints_idx = constraints_index; 1974 PetscFunctionReturn(0); 1975 } 1976 1977 /* the next two functions has been adapted from pcis.c */ 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "PCBDDCApplySchur" 1980 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 1981 { 1982 PetscErrorCode ierr; 1983 PC_IS *pcis = (PC_IS*)(pc->data); 1984 1985 PetscFunctionBegin; 1986 if (!vec2_B) { vec2_B = v; } 1987 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 1988 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 1989 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 1990 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 1991 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "PCBDDCApplySchurTranspose" 1997 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 1998 { 1999 PetscErrorCode ierr; 2000 PC_IS *pcis = (PC_IS*)(pc->data); 2001 2002 PetscFunctionBegin; 2003 if (!vec2_B) { vec2_B = v; } 2004 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2005 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2006 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2007 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2008 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "PCBDDCSubsetNumbering" 2014 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2015 { 2016 Vec local_vec,global_vec; 2017 IS seqis,paris; 2018 VecScatter scatter_ctx; 2019 PetscScalar *array; 2020 PetscInt *temp_global_dofs; 2021 PetscScalar globalsum; 2022 PetscInt i,j,s; 2023 PetscInt nlocals,first_index,old_index,max_local; 2024 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2025 PetscMPIInt *dof_sizes,*dof_displs; 2026 PetscBool first_found; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 /* mpi buffers */ 2031 MPI_Comm_size(comm,&size_prec_comm); 2032 MPI_Comm_rank(comm,&rank_prec_comm); 2033 j = ( !rank_prec_comm ? size_prec_comm : 0); 2034 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2035 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2036 /* get maximum size of subset */ 2037 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2038 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2039 max_local = 0; 2040 if (n_local_dofs) { 2041 max_local = temp_global_dofs[0]; 2042 for (i=1;i<n_local_dofs;i++) { 2043 if (max_local < temp_global_dofs[i] ) { 2044 max_local = temp_global_dofs[i]; 2045 } 2046 } 2047 } 2048 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2049 max_global++; 2050 max_local = 0; 2051 if (n_local_dofs) { 2052 max_local = local_dofs[0]; 2053 for (i=1;i<n_local_dofs;i++) { 2054 if (max_local < local_dofs[i] ) { 2055 max_local = local_dofs[i]; 2056 } 2057 } 2058 } 2059 max_local++; 2060 /* allocate workspace */ 2061 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2062 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2063 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2064 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2065 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2066 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2067 /* create scatter */ 2068 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2069 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2070 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2071 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2072 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2073 /* init array */ 2074 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2075 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2076 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2077 if (local_dofs_mult) { 2078 for (i=0;i<n_local_dofs;i++) { 2079 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2080 } 2081 } else { 2082 for (i=0;i<n_local_dofs;i++) { 2083 array[local_dofs[i]]=1.0; 2084 } 2085 } 2086 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2087 /* scatter into global vec and get total number of global dofs */ 2088 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2091 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2092 /* Fill global_vec with cumulative function for global numbering */ 2093 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2094 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2095 nlocals = 0; 2096 first_index = -1; 2097 first_found = PETSC_FALSE; 2098 for (i=0;i<s;i++) { 2099 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2100 first_found = PETSC_TRUE; 2101 first_index = i; 2102 } 2103 nlocals += (PetscInt)PetscRealPart(array[i]); 2104 } 2105 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2106 if (!rank_prec_comm) { 2107 dof_displs[0]=0; 2108 for (i=1;i<size_prec_comm;i++) { 2109 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2110 } 2111 } 2112 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2113 if (first_found) { 2114 array[first_index] += (PetscScalar)nlocals; 2115 old_index = first_index; 2116 for (i=first_index+1;i<s;i++) { 2117 if (PetscRealPart(array[i]) > 0.0) { 2118 array[i] += array[old_index]; 2119 old_index = i; 2120 } 2121 } 2122 } 2123 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2124 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2125 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2126 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2127 /* get global ordering of local dofs */ 2128 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2129 if (local_dofs_mult) { 2130 for (i=0;i<n_local_dofs;i++) { 2131 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2132 } 2133 } else { 2134 for (i=0;i<n_local_dofs;i++) { 2135 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2136 } 2137 } 2138 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2139 /* free workspace */ 2140 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2141 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2142 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2143 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2144 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2145 /* return pointer to global ordering of local dofs */ 2146 *global_numbering_subset = temp_global_dofs; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2152 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2153 { 2154 PetscInt i,j; 2155 PetscScalar *alphas; 2156 PetscErrorCode ierr; 2157 2158 PetscFunctionBegin; 2159 /* this implements stabilized Gram-Schmidt */ 2160 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2161 for (i=0;i<n;i++) { 2162 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2163 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2164 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2165 } 2166 ierr = PetscFree(alphas);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatConvert_IS_AIJ" 2173 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2174 { 2175 Mat new_mat; 2176 Mat_IS *matis = (Mat_IS*)(mat->data); 2177 /* info on mat */ 2178 PetscInt bs,rows,cols; 2179 PetscInt lrows,lcols; 2180 PetscInt local_rows,local_cols; 2181 PetscMPIInt nsubdomains; 2182 /* preallocation */ 2183 Vec vec_dnz,vec_onz; 2184 PetscScalar *my_dnz,*my_onz,*array; 2185 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2186 PetscInt index_row,index_col,owner; 2187 PetscInt *local_indices,*global_indices; 2188 /* work */ 2189 PetscInt i,j; 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 /* CHECKS */ 2194 /* get info from mat */ 2195 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2196 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2197 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2198 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2199 2200 /* MAT_INITIAL_MATRIX */ 2201 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2202 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2203 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2204 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2205 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2206 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2207 2208 /* 2209 preallocation 2210 */ 2211 2212 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2213 /* 2214 Some vectors are needed to sum up properly on shared interface dofs. 2215 Note that preallocation is not exact, since it overestimates nonzeros 2216 */ 2217 /* 2218 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2219 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2220 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2221 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2222 */ 2223 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2224 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2225 /* All processes need to compute entire row ownership */ 2226 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2227 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2228 for (i=0;i<nsubdomains;i++) { 2229 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2230 row_ownership[j]=i; 2231 } 2232 } 2233 /* map indices of local mat to global values */ 2234 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2235 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2236 for (i=0;i<local_rows;i++) local_indices[i]=i; 2237 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2238 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2239 2240 /* 2241 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2242 then, they will be summed up properly. This way, preallocation is always sufficient 2243 */ 2244 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2245 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2246 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2247 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2248 for (i=0;i<local_rows;i++) { 2249 index_row = global_indices[i]; 2250 for (j=i;j<local_rows;j++) { 2251 owner = row_ownership[index_row]; 2252 index_col = global_indices[j]; 2253 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2254 my_dnz[i] += 1.0; 2255 } else { /* offdiag block */ 2256 my_onz[i] += 1.0; 2257 } 2258 /* same as before, interchanging rows and cols */ 2259 if (i != j) { 2260 owner = row_ownership[index_col]; 2261 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2262 my_dnz[j] += 1.0; 2263 } else { 2264 my_onz[j] += 1.0; 2265 } 2266 } 2267 } 2268 } 2269 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2270 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2271 if (local_rows) { /* multilevel guard */ 2272 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2273 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2274 } 2275 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2276 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2277 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2278 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2279 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2280 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2281 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2282 2283 /* set computed preallocation in dnz and onz */ 2284 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2285 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2286 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2287 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2288 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2289 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2290 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2291 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2292 2293 /* Resize preallocation if overestimated */ 2294 for (i=0;i<lrows;i++) { 2295 dnz[i] = PetscMin(dnz[i],lcols); 2296 onz[i] = PetscMin(onz[i],cols-lcols); 2297 } 2298 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2299 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2300 2301 /* 2302 Set values. Very Basic. 2303 */ 2304 for (i=0;i<local_rows;i++) { 2305 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2306 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2307 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2308 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2309 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2310 } 2311 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2313 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2314 2315 /* get back mat */ 2316 *M = new_mat; 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatISSubassemble_Private" 2322 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2323 { 2324 Mat subdomain_adj; 2325 IS new_ranks,ranks_send_to; 2326 MatPartitioning partitioner; 2327 Mat_IS *matis; 2328 PetscInt n_neighs,*neighs,*n_shared,**shared; 2329 PetscInt prank; 2330 PetscMPIInt size,rank,color; 2331 PetscInt *xadj,*adjncy,*oldranks; 2332 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2333 PetscInt i,j,n_subdomains,local_size,threshold=0; 2334 PetscErrorCode ierr; 2335 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2336 PetscSubcomm subcomm; 2337 2338 PetscFunctionBegin; 2339 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2340 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2341 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2342 2343 /* Get info on mapping */ 2344 matis = (Mat_IS*)(mat->data); 2345 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2346 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2347 2348 /* build local CSR graph of subdomains' connectivity */ 2349 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2350 xadj[0] = 0; 2351 xadj[1] = PetscMax(n_neighs-1,0); 2352 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2353 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2354 2355 if (threshold) { 2356 PetscInt* count,min_threshold; 2357 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2358 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2359 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2360 for (j=0;j<n_shared[i];j++) { 2361 count[shared[i][j]] += 1; 2362 } 2363 } 2364 /* adapt threshold since we dont want to lose significant connections */ 2365 min_threshold = n_neighs; 2366 for (i=1;i<n_neighs;i++) { 2367 for (j=0;j<n_shared[i];j++) { 2368 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2369 } 2370 } 2371 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2372 threshold = PetscMax(min_threshold+1,threshold); 2373 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2374 xadj[1] = 0; 2375 for (i=1;i<n_neighs;i++) { 2376 for (j=0;j<n_shared[i];j++) { 2377 if (count[shared[i][j]] < threshold) { 2378 adjncy[xadj[1]] = neighs[i]; 2379 adjncy_wgt[xadj[1]] = n_shared[i]; 2380 xadj[1]++; 2381 break; 2382 } 2383 } 2384 } 2385 ierr = PetscFree(count);CHKERRQ(ierr); 2386 } else { 2387 if (xadj[1]) { 2388 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2389 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2390 } 2391 } 2392 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2393 if (use_square) { 2394 for (i=0;i<xadj[1];i++) { 2395 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2396 } 2397 } 2398 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2399 2400 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2401 2402 /* 2403 Restrict work on active processes only. 2404 */ 2405 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2406 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2407 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2408 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2409 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr); 2410 if (color) { 2411 ierr = PetscFree(xadj);CHKERRQ(ierr); 2412 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2413 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2414 } else { 2415 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2416 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2417 prank = rank; 2418 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2419 for (i=0;i<size;i++) { 2420 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2421 } 2422 for (i=0;i<xadj[1];i++) { 2423 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2424 } 2425 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2426 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2427 n_subdomains = (PetscInt)size; 2428 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2429 2430 /* Partition */ 2431 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2432 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2433 if (use_vwgt) { 2434 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2435 v_wgt[0] = local_size; 2436 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2437 } 2438 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2439 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2440 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2441 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2442 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2443 2444 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2445 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2446 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2447 /* clean up */ 2448 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2449 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2450 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2451 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2452 } 2453 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2454 2455 /* assemble parallel IS for sends */ 2456 i = 1; 2457 if (color) i=0; 2458 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2459 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2460 2461 /* get back IS */ 2462 *is_sends = ranks_send_to; 2463 PetscFunctionReturn(0); 2464 } 2465 2466 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatISSubassemble" 2470 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2471 { 2472 Mat local_mat,new_mat; 2473 Mat_IS *matis; 2474 IS is_sends_internal; 2475 PetscInt rows,cols; 2476 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2477 PetscBool ismatis,isdense; 2478 ISLocalToGlobalMapping l2gmap; 2479 PetscInt* l2gmap_indices; 2480 const PetscInt* is_indices; 2481 MatType new_local_type; 2482 MatTypePrivate new_local_type_private; 2483 /* buffers */ 2484 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2485 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2486 /* MPI */ 2487 MPI_Comm comm; 2488 PetscMPIInt n_sends,n_recvs,commsize; 2489 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2490 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2491 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2492 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 /* checks */ 2497 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2498 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2499 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2500 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2501 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2502 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2503 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2504 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2505 PetscValidLogicalCollectiveInt(mat,bs,0); 2506 /* prepare IS for sending if not provided */ 2507 if (!is_sends) { 2508 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2509 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2510 } else { 2511 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2512 is_sends_internal = is_sends; 2513 } 2514 2515 /* get pointer of MATIS data */ 2516 matis = (Mat_IS*)mat->data; 2517 2518 /* get comm */ 2519 comm = PetscObjectComm((PetscObject)mat); 2520 2521 /* compute number of sends */ 2522 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2523 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2524 2525 /* compute number of receives */ 2526 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2527 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2528 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2529 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2530 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2531 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2532 ierr = PetscFree(iflags);CHKERRQ(ierr); 2533 2534 /* prepare send/receive buffers */ 2535 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2536 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2537 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2538 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2539 2540 /* Get data from local mat */ 2541 if (!isdense) { 2542 /* TODO: See below some guidelines on how to prepare the local buffers */ 2543 /* 2544 send_buffer_vals should contain the raw values of the local matrix 2545 send_buffer_idxs should contain: 2546 - MatType_PRIVATE type 2547 - PetscInt size_of_l2gmap 2548 - PetscInt global_row_indices[size_of_l2gmap] 2549 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2550 */ 2551 } else { 2552 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2553 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2554 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2555 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2556 send_buffer_idxs[1] = i; 2557 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2558 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2559 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2560 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2561 for (i=0;i<n_sends;i++) { 2562 ilengths_vals[is_indices[i]] = len*len; 2563 ilengths_idxs[is_indices[i]] = len+2; 2564 } 2565 } 2566 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2567 buf_size_idxs = 0; 2568 buf_size_vals = 0; 2569 for (i=0;i<n_recvs;i++) { 2570 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2571 buf_size_vals += (PetscInt)olengths_vals[i]; 2572 } 2573 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2574 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2575 2576 /* get new tags for clean communications */ 2577 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2578 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2579 2580 /* allocate for requests */ 2581 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2582 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2583 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2584 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2585 2586 /* communications */ 2587 ptr_idxs = recv_buffer_idxs; 2588 ptr_vals = recv_buffer_vals; 2589 for (i=0;i<n_recvs;i++) { 2590 source_dest = onodes[i]; 2591 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2592 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2593 ptr_idxs += olengths_idxs[i]; 2594 ptr_vals += olengths_vals[i]; 2595 } 2596 for (i=0;i<n_sends;i++) { 2597 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2598 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2599 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2600 } 2601 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2602 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2603 2604 /* assemble new l2g map */ 2605 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2606 ptr_idxs = recv_buffer_idxs; 2607 buf_size_idxs = 0; 2608 for (i=0;i<n_recvs;i++) { 2609 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2610 ptr_idxs += olengths_idxs[i]; 2611 } 2612 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2613 ptr_idxs = recv_buffer_idxs; 2614 buf_size_idxs = 0; 2615 for (i=0;i<n_recvs;i++) { 2616 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2617 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2618 ptr_idxs += olengths_idxs[i]; 2619 } 2620 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2621 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2622 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2623 2624 /* infer new local matrix type from received local matrices type */ 2625 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2626 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2627 new_local_type_private = MATAIJ_PRIVATE; 2628 new_local_type = MATSEQAIJ; 2629 if (n_recvs) { 2630 new_local_type_private = send_buffer_idxs[0]; 2631 ptr_idxs = recv_buffer_idxs; 2632 for (i=0;i<n_recvs;i++) { 2633 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2634 new_local_type_private = MATAIJ_PRIVATE; 2635 break; 2636 } 2637 ptr_idxs += olengths_idxs[i]; 2638 } 2639 switch (new_local_type_private) { 2640 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2641 new_local_type = MATSEQAIJ; 2642 bs = 1; 2643 break; 2644 case MATAIJ_PRIVATE: 2645 new_local_type = MATSEQAIJ; 2646 bs = 1; 2647 break; 2648 case MATBAIJ_PRIVATE: 2649 new_local_type = MATSEQBAIJ; 2650 break; 2651 case MATSBAIJ_PRIVATE: 2652 new_local_type = MATSEQSBAIJ; 2653 break; 2654 default: 2655 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2656 break; 2657 } 2658 } 2659 2660 /* create MATIS object */ 2661 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2662 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2663 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2664 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2665 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2666 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2667 2668 /* set values */ 2669 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2670 ptr_vals = recv_buffer_vals; 2671 ptr_idxs = recv_buffer_idxs; 2672 for (i=0;i<n_recvs;i++) { 2673 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2674 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2675 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2676 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2677 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2678 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2679 } 2680 ptr_idxs += olengths_idxs[i]; 2681 ptr_vals += olengths_vals[i]; 2682 } 2683 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2685 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2687 2688 { /* check */ 2689 Vec lvec,rvec; 2690 PetscReal infty_error; 2691 2692 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2693 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2694 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2695 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2696 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2697 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2698 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2699 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2700 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2701 } 2702 2703 /* free workspace */ 2704 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2705 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2706 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2707 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2708 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2709 if (isdense) { 2710 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2711 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2712 } else { 2713 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2714 } 2715 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2716 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2717 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2718 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2719 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2720 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2721 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2722 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2723 ierr = PetscFree(onodes);CHKERRQ(ierr); 2724 /* get back new mat */ 2725 *mat_n = new_mat; 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2731 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2732 { 2733 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2734 PC_IS *pcis = (PC_IS*)pc->data; 2735 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2736 MatNullSpace CoarseNullSpace=NULL; 2737 ISLocalToGlobalMapping coarse_islg; 2738 IS coarse_is; 2739 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2740 PetscInt im_active=-1,active_procs=-1; 2741 PC pc_temp; 2742 PCType coarse_pc_type; 2743 KSPType coarse_ksp_type; 2744 PetscBool multilevel_requested,multilevel_allowed; 2745 PetscBool setsym,issym,isbddc,isnn; 2746 MatStructure matstruct; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 /* Assign global numbering to coarse dofs */ 2751 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2752 2753 /* infer some info from user */ 2754 issym = PETSC_FALSE; 2755 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2756 multilevel_allowed = PETSC_FALSE; 2757 multilevel_requested = PETSC_FALSE; 2758 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2759 if (multilevel_requested) { 2760 /* count "active processes" */ 2761 im_active = !!(pcis->n); 2762 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2763 if (active_procs/pcbddc->coarsening_ratio < 2) { 2764 multilevel_allowed = PETSC_FALSE; 2765 } else { 2766 multilevel_allowed = PETSC_TRUE; 2767 } 2768 } 2769 2770 /* set defaults for coarse KSP and PC */ 2771 if (multilevel_allowed) { 2772 if (issym) { 2773 coarse_ksp_type = KSPRICHARDSON; 2774 } else { 2775 coarse_ksp_type = KSPCHEBYSHEV; 2776 } 2777 coarse_pc_type = PCBDDC; 2778 } else { 2779 coarse_ksp_type = KSPPREONLY; 2780 coarse_pc_type = PCREDUNDANT; 2781 } 2782 2783 /* create the coarse KSP object only once with defaults */ 2784 if (!pcbddc->coarse_ksp) { 2785 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2786 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2787 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2788 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2789 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2790 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2791 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2792 } 2793 /* allow user customization */ 2794 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2795 2796 /* get some info after set from options */ 2797 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2798 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 2799 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2800 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 2801 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2802 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2803 isbddc = PETSC_FALSE; 2804 } 2805 2806 /* propagate BDDC info to the next level */ 2807 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2808 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2809 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2810 2811 /* creates temporary MATIS object for coarse matrix */ 2812 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2813 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2814 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2815 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2816 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2817 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2818 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2820 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2821 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 2822 2823 /* assemble coarse matrix */ 2824 if (isbddc || isnn) { 2825 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2826 } else { 2827 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2828 } 2829 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2830 2831 /* create local to global scatters for coarse problem */ 2832 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2833 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2834 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2835 2836 /* propagate symmetry info to coarse matrix */ 2837 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 2838 2839 /* Compute coarse null space (special handling by BDDC only) */ 2840 if (pcbddc->NullSpace) { 2841 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 2842 if (isbddc) { 2843 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 2844 } else { 2845 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 2846 } 2847 } 2848 2849 /* set operators */ 2850 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 2851 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 2852 2853 /* additional KSP customization */ 2854 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 2855 if (max_it < 5) { 2856 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 2857 } 2858 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 2859 2860 2861 /* print some info if requested */ 2862 if (pcbddc->dbg_flag) { 2863 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 2864 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 2865 if (!multilevel_allowed) { 2866 if (multilevel_requested) { 2867 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2868 } else if (pcbddc->max_levels) { 2869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 2870 } 2871 } 2872 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level);CHKERRQ(ierr); 2873 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2874 } 2875 2876 /* setup coarse ksp */ 2877 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2878 if (pcbddc->dbg_flag) { 2879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 2880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2881 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 2882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2883 } 2884 2885 /* HACK: TODO see if it could be avoided */ 2886 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 2887 2888 /* Check coarse problem if requested */ 2889 if (pcbddc->dbg_flag) { 2890 KSP check_ksp; 2891 KSPType check_ksp_type; 2892 PC check_pc; 2893 Vec check_vec; 2894 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 2895 PetscInt its; 2896 PetscBool ispreonly,compute; 2897 2898 /* Create ksp object suitable for estimation of extreme eigenvalues */ 2899 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 2900 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2901 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 2902 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 2903 if (ispreonly) { 2904 check_ksp_type = KSPPREONLY; 2905 compute = PETSC_FALSE; 2906 } else { 2907 if (issym) check_ksp_type = KSPCG; 2908 else check_ksp_type = KSPGMRES; 2909 compute = PETSC_TRUE; 2910 } 2911 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 2912 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 2913 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 2914 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 2915 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 2916 /* create random vec */ 2917 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 2918 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 2919 if (CoarseNullSpace) { 2920 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 2921 } 2922 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2923 /* solve coarse problem */ 2924 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2925 if (CoarseNullSpace) { 2926 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 2927 } 2928 /* check coarse problem residual error */ 2929 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 2930 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2931 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2932 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 2933 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 2934 /* get eigenvalue estimation if preonly has not been requested */ 2935 if (!ispreonly) { 2936 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2937 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 2938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 2939 } 2940 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 2941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 2942 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2943 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 2944 } 2945 /* free memory */ 2946 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 2947 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 2953 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 2954 { 2955 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2956 PC_IS* pcis = (PC_IS*)pc->data; 2957 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2958 PetscInt i,j,coarse_size; 2959 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 /* get indices in local ordering for vertices and constraints */ 2964 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2965 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2966 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2967 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2968 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2969 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2970 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2971 2972 /* Compute global number of coarse dofs */ 2973 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2974 2975 /* check numbering */ 2976 if (pcbddc->dbg_flag) { 2977 PetscScalar coarsesum,*array; 2978 2979 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2980 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 2982 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2983 for (i=0;i<pcbddc->local_primal_size;i++) { 2984 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2985 } 2986 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2987 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2988 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2989 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2990 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2991 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2992 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2993 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2994 for (i=0;i<pcis->n;i++) { 2995 if (array[i] == 1.0) { 2996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 2997 } 2998 } 2999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3000 for (i=0;i<pcis->n;i++) { 3001 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3002 } 3003 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3004 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3005 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3006 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3007 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3008 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3009 if (pcbddc->dbg_flag > 1) { 3010 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3011 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3012 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3013 for (i=0;i<pcbddc->local_primal_size;i++) { 3014 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3015 } 3016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3017 } 3018 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3019 } 3020 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3021 /* get back data */ 3022 *coarse_size_n = coarse_size; 3023 *local_primal_indices_n = local_primal_indices; 3024 PetscFunctionReturn(0); 3025 } 3026 3027