1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCCreateWorkVectors" 110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 111 { 112 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 113 PC_IS *pcis = (PC_IS*)pc->data; 114 VecType impVecType; 115 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 120 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 121 local_primal_size = n_constraints+n_vertices; 122 n_R = pcis->n-n_vertices; 123 /* local work vectors */ 124 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 125 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 126 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 127 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 128 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 129 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 130 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 131 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 132 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 133 if (n_constraints) { 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 137 } 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 144 { 145 PetscErrorCode ierr; 146 /* pointers to pcis and pcbddc */ 147 PC_IS* pcis = (PC_IS*)pc->data; 148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 149 /* submatrices of local problem */ 150 Mat A_RV,A_VR,A_VV; 151 /* working matrices */ 152 Mat M1,M2,M3,C_CR; 153 /* working vectors */ 154 Vec vec1_C,vec2_C,vec1_V,vec2_V; 155 /* additional working stuff */ 156 IS is_aux; 157 ISLocalToGlobalMapping BtoNmap; 158 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 159 const PetscScalar *array,*row_cmat_values; 160 const PetscInt *row_cmat_indices,*idx_R_local; 161 PetscInt *vertices,*idx_V_B,*auxindices; 162 PetscInt n_vertices,n_constraints,size_of_constraint; 163 PetscInt i,j,n_R,n_D,n_B; 164 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 165 /* Vector and matrix types */ 166 VecType impVecType; 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 184 185 /* Allocating some extra storage just to be safe */ 186 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 187 for (i=0;i<pcis->n;i++) auxindices[i]=i; 188 189 /* vertices in boundary numbering */ 190 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 191 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 192 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 194 if (i != n_vertices) { 195 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 196 } 197 198 /* some work vectors on vertices and/or constraints */ 199 if (n_vertices) { 200 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 201 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 202 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 203 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 204 } 205 if (n_constraints) { 206 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 208 } 209 210 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 211 if (n_constraints) { 212 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 213 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 214 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 215 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 216 217 /* Extract constraints on R nodes: C_{CR} */ 218 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 220 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 221 222 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 223 for (i=0;i<n_constraints;i++) { 224 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 225 /* Get row of constraint matrix in R numbering */ 226 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 227 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 228 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 229 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 230 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 231 /* Solve for row of constraint matrix in R numbering */ 232 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 233 /* Set values in local_auxmat2 */ 234 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 235 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 237 } 238 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 241 242 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 243 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 244 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 245 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 246 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 247 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(M1);CHKERRQ(ierr); 249 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 250 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 251 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 252 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 254 ierr = MatDestroy(&M2);CHKERRQ(ierr); 255 ierr = MatDestroy(&M3);CHKERRQ(ierr); 256 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 257 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 258 } 259 260 /* Get submatrices from subdomain matrix */ 261 if (n_vertices) { 262 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 265 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 266 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 267 } 268 269 /* Matrix of coarse basis functions (local) */ 270 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 271 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 272 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 273 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 274 if (pcbddc->switch_static || pcbddc->dbg_flag) { 275 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 276 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 277 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 279 } 280 281 if (pcbddc->dbg_flag) { 282 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 283 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 284 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 285 } 286 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 287 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 288 289 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 290 291 /* vertices */ 292 for (i=0;i<n_vertices;i++) { 293 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 294 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 295 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 296 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 297 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 298 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 299 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 300 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 301 if (n_constraints) { 302 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 303 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 304 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 305 } 306 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 307 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 308 309 /* Set values in coarse basis function and subdomain part of coarse_mat */ 310 /* coarse basis functions */ 311 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 312 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 313 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 314 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 315 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 317 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 318 if (pcbddc->switch_static || pcbddc->dbg_flag) { 319 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 320 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 322 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 323 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 324 } 325 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 326 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 327 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 329 if (n_constraints) { 330 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 331 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 332 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 333 } 334 335 /* check */ 336 if (pcbddc->dbg_flag) { 337 /* assemble subdomain vector on local nodes */ 338 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 339 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 340 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 342 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 343 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 344 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 345 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 346 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 356 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 357 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 358 /* check saddle point solution */ 359 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 360 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 361 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 362 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 363 /* shift by the identity matrix */ 364 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 365 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 366 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 367 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 368 } 369 } 370 371 /* constraints */ 372 for (i=0;i<n_constraints;i++) { 373 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 374 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 375 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 376 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 377 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 378 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 379 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 381 if (n_vertices) { 382 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 383 } 384 /* Set values in coarse basis function and subdomain part of coarse_mat */ 385 /* coarse basis functions */ 386 j = i+n_vertices; /* don't touch this! */ 387 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 388 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 389 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 390 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 391 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 392 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 393 if (pcbddc->switch_static || pcbddc->dbg_flag) { 394 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 399 } 400 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 401 if (n_vertices) { 402 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 403 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 405 } 406 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 407 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 408 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 409 410 if (pcbddc->dbg_flag) { 411 /* assemble subdomain vector on nodes */ 412 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 414 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 416 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 417 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 418 /* assemble subdomain vector of lagrange multipliers */ 419 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 420 if (n_vertices) { 421 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 } 425 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 429 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 430 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 431 /* check saddle point solution */ 432 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 433 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 434 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 435 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 436 /* shift by the identity matrix */ 437 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 441 } 442 } 443 /* call assembling routines for local coarse basis */ 444 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 445 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 447 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 448 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 449 } 450 451 /* compute other basis functions for non-symmetric problems */ 452 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 453 if (!setsym || (setsym && !issym)) { 454 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 455 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 456 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 457 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 458 if (pcbddc->switch_static || pcbddc->dbg_flag) { 459 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 460 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 461 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 462 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 463 } 464 for (i=0;i<pcbddc->local_primal_size;i++) { 465 if (n_constraints) { 466 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 467 for (j=0;j<n_constraints;j++) { 468 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 469 } 470 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 471 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 472 } 473 if (i<n_vertices) { 474 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 475 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 477 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 478 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 479 if (n_constraints) { 480 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 481 } 482 } else { 483 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 484 } 485 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 487 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 490 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 492 if (i<n_vertices) { 493 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 494 } 495 if (pcbddc->switch_static || pcbddc->dbg_flag) { 496 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 498 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 499 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 500 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 501 } 502 503 if (pcbddc->dbg_flag) { 504 /* assemble subdomain vector on nodes */ 505 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 509 if (i<n_vertices) { 510 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 511 } 512 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 513 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 514 /* assemble subdomain vector of lagrange multipliers */ 515 for (j=0;j<pcbddc->local_primal_size;j++) { 516 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 517 } 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 /* check saddle point solution */ 521 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 522 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 524 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 525 /* shift by the identity matrix */ 526 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 527 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 528 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 530 } 531 } 532 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 if (pcbddc->switch_static || pcbddc->dbg_flag) { 535 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 537 } 538 } 539 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 540 /* Checking coarse_sub_mat and coarse basis functios */ 541 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 542 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 543 if (pcbddc->dbg_flag) { 544 Mat coarse_sub_mat; 545 Mat AUXMAT,TM1,TM2,TM3,TM4; 546 Mat coarse_phi_D,coarse_phi_B; 547 Mat coarse_psi_D,coarse_psi_B; 548 Mat A_II,A_BB,A_IB,A_BI; 549 MatType checkmattype=MATSEQAIJ; 550 PetscReal real_value; 551 552 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 553 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 554 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 555 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 556 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 557 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 558 if (pcbddc->coarse_psi_B) { 559 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 560 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 561 } 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 563 564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 567 if (pcbddc->coarse_psi_B) { 568 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 569 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 570 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 571 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 572 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 573 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 574 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 575 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 576 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 577 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 578 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 579 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 580 } else { 581 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 582 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 583 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 584 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 585 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 586 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 } 590 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 591 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 592 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 593 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 594 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 595 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 600 for (i=0;i<pcbddc->local_primal_size;i++) { 601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 602 } 603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 604 for (i=0;i<pcbddc->local_primal_size;i++) { 605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 606 } 607 if (pcbddc->coarse_psi_B) { 608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 609 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 611 } 612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 613 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 615 } 616 } 617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 618 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 619 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 620 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 621 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 622 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 623 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 624 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 625 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 626 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 627 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 628 if (pcbddc->coarse_psi_B) { 629 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 630 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 631 } 632 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 633 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 634 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 635 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 636 } 637 /* free memory */ 638 if (n_vertices) { 639 ierr = PetscFree(vertices);CHKERRQ(ierr); 640 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 641 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 643 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 644 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 645 } 646 if (n_constraints) { 647 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 648 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 651 } 652 ierr = PetscFree(auxindices);CHKERRQ(ierr); 653 /* get back data */ 654 *coarse_submat_vals_n = coarse_submat_vals; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 661 { 662 PC_IS* pcis = (PC_IS*)(pc->data); 663 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 664 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 665 /* manage repeated solves */ 666 MatReuse reuse; 667 MatStructure matstruct; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 /* get mat flags */ 672 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 673 reuse = MAT_INITIAL_MATRIX; 674 if (pc->setupcalled) { 675 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 676 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 677 if (matstruct == SAME_NONZERO_PATTERN) { 678 reuse = MAT_REUSE_MATRIX; 679 } else { 680 reuse = MAT_INITIAL_MATRIX; 681 } 682 } 683 if (reuse == MAT_INITIAL_MATRIX) { 684 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 688 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 689 } 690 691 /* transform local matrices if needed */ 692 if (pcbddc->use_change_of_basis) { 693 Mat change_mat_all; 694 PetscScalar *row_cmat_values; 695 PetscInt *row_cmat_indices; 696 PetscInt *nnz,*is_indices,*temp_indices; 697 PetscInt i,j,k,n_D,n_B; 698 699 /* Get Non-overlapping dimensions */ 700 n_B = pcis->n_B; 701 n_D = pcis->n-n_B; 702 703 /* compute nonzero structure of change of basis on all local nodes */ 704 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 705 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 706 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 707 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 708 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 709 k=1; 710 for (i=0;i<n_B;i++) { 711 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 712 nnz[is_indices[i]]=j; 713 if (k < j) k = j; 714 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 715 } 716 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 /* assemble change of basis matrix on the whole set of local dofs */ 718 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 719 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 720 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 721 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 722 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 724 for (i=0;i<n_D;i++) { 725 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 726 } 727 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 729 for (i=0;i<n_B;i++) { 730 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 731 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 732 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 733 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 734 } 735 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 738 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 739 if (i==1) { 740 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 741 } else { 742 Mat work_mat; 743 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 744 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 745 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 746 } 747 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 748 ierr = PetscFree(nnz);CHKERRQ(ierr); 749 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 750 } else { 751 /* without change of basis, the local matrix is unchanged */ 752 if (!pcbddc->local_mat) { 753 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 754 pcbddc->local_mat = matis->A; 755 } 756 } 757 758 /* get submatrices */ 759 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 760 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 761 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 762 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 769 { 770 PC_IS* pcis = (PC_IS*)(pc->data); 771 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 772 IS is_aux1,is_aux2; 773 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 774 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 775 PetscBT bitmask; 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 /* Set Non-overlapping dimensions */ 780 n_B = pcis->n_B; n_D = pcis->n - n_B; 781 /* get vertex indices from constraint matrix */ 782 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 783 /* Set number of constraints */ 784 n_constraints = pcbddc->local_primal_size-n_vertices; 785 /* create auxiliary bitmask */ 786 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 787 for (i=0;i<n_vertices;i++) { 788 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 789 } 790 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 791 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 792 for (i=0, n_R=0; i<pcis->n; i++) { 793 if (!PetscBTLookup(bitmask,i)) { 794 idx_R_local[n_R] = i; 795 n_R++; 796 } 797 } 798 ierr = PetscFree(vertices);CHKERRQ(ierr); 799 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 800 801 /* print some info if requested */ 802 if (pcbddc->dbg_flag) { 803 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 810 } 811 812 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 813 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 814 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 815 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 for (i=0; i<n_D; i++) { 817 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 818 } 819 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0, j=0; i<n_R; i++) { 821 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 822 aux_array1[j++] = i; 823 } 824 } 825 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 826 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 827 for (i=0, j=0; i<n_B; i++) { 828 if (!PetscBTLookup(bitmask,is_indices[i])) { 829 aux_array2[j++] = i; 830 } 831 } 832 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 834 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 835 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 836 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 837 838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 839 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 840 for (i=0, j=0; i<n_R; i++) { 841 if (PetscBTLookup(bitmask,idx_R_local[i])) { 842 aux_array1[j++] = i; 843 } 844 } 845 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 846 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 848 } 849 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 856 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 857 { 858 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 859 PC_IS *pcis = (PC_IS*)pc->data; 860 PC pc_temp; 861 Mat A_RR; 862 Vec vec1,vec2,vec3; 863 MatStructure matstruct; 864 PetscScalar m_one = -1.0; 865 PetscReal value; 866 PetscInt n_D,n_R; 867 PetscBool use_exact,use_exact_reduced; 868 PetscErrorCode ierr; 869 /* prefixes stuff */ 870 char dir_prefix[256],neu_prefix[256],str_level[3]; 871 size_t len; 872 873 PetscFunctionBegin; 874 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 875 876 /* compute prefixes */ 877 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 878 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 879 if (!pcbddc->current_level) { 880 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 881 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 882 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 883 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 884 } else { 885 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 886 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 887 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 888 len -= 15; /* remove "pc_bddc_coarse_" */ 889 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 890 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 891 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 892 *(dir_prefix+len)='\0'; 893 *(neu_prefix+len)='\0'; 894 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 895 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 896 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 897 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 898 } 899 900 /* DIRICHLET PROBLEM */ 901 /* Matrix for Dirichlet problem is pcis->A_II */ 902 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 903 if (!pcbddc->ksp_D) { /* create object if not yet build */ 904 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 905 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 906 /* default */ 907 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 908 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 909 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 910 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 911 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 912 } 913 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 914 /* Allow user's customization */ 915 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 916 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 917 if (!n_D) { 918 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 919 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 920 } 921 /* Set Up KSP for Dirichlet problem of BDDC */ 922 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 923 /* set ksp_D into pcis data */ 924 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 925 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 926 pcis->ksp_D = pcbddc->ksp_D; 927 928 /* NEUMANN PROBLEM */ 929 /* Matrix for Neumann problem is A_RR -> we need to create it */ 930 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 931 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 932 if (!pcbddc->ksp_R) { /* create object if not yet build */ 933 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 934 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 935 /* default */ 936 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 937 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 938 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 939 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 940 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 941 } 942 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 943 /* Allow user's customization */ 944 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 945 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 946 if (!n_R) { 947 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 948 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 949 } 950 /* Set Up KSP for Neumann problem of BDDC */ 951 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 952 953 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 954 955 /* Dirichlet */ 956 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 957 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 958 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 959 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 960 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 961 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 962 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 963 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 964 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 965 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 966 /* need to be adapted? */ 967 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 968 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 969 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 970 /* print info */ 971 if (pcbddc->dbg_flag) { 972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 973 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 977 } 978 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 979 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 980 } 981 982 /* Neumann */ 983 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 984 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 985 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 986 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 987 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 988 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 989 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 990 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 991 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 992 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 993 /* need to be adapted? */ 994 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 995 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 996 /* print info */ 997 if (pcbddc->dbg_flag) { 998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1000 } 1001 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1002 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1003 } 1004 /* free Neumann problem's matrix */ 1005 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1011 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1012 { 1013 PetscErrorCode ierr; 1014 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1015 1016 PetscFunctionBegin; 1017 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1018 if (pcbddc->local_auxmat1) { 1019 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1020 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1027 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1028 { 1029 PetscErrorCode ierr; 1030 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1031 PC_IS* pcis = (PC_IS*) (pc->data); 1032 const PetscScalar zero = 0.0; 1033 1034 PetscFunctionBegin; 1035 /* Application of PHI^T (or PSI^T) */ 1036 if (pcbddc->coarse_psi_B) { 1037 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1038 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1039 } else { 1040 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1041 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1042 } 1043 /* Scatter data of coarse_rhs */ 1044 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1045 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 1047 /* Local solution on R nodes */ 1048 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1049 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1050 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1051 if (pcbddc->switch_static) { 1052 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1053 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1054 } 1055 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1056 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1057 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 if (pcbddc->switch_static) { 1060 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 } 1063 1064 /* Coarse solution */ 1065 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1066 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1067 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1068 } 1069 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1070 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1071 1072 /* Sum contributions from two levels */ 1073 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1074 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1080 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1081 { 1082 PetscErrorCode ierr; 1083 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1084 1085 PetscFunctionBegin; 1086 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1092 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1093 { 1094 PetscErrorCode ierr; 1095 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1096 1097 PetscFunctionBegin; 1098 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 /* uncomment for testing purposes */ 1103 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1106 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1107 { 1108 PetscErrorCode ierr; 1109 PC_IS* pcis = (PC_IS*)(pc->data); 1110 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1111 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1112 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1113 MatType impMatType=MATSEQAIJ; 1114 /* one and zero */ 1115 PetscScalar one=1.0,zero=0.0; 1116 /* space to store constraints and their local indices */ 1117 PetscScalar *temp_quadrature_constraint; 1118 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1119 /* iterators */ 1120 PetscInt i,j,k,total_counts,temp_start_ptr; 1121 /* stuff to store connected components stored in pcbddc->mat_graph */ 1122 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1123 PetscInt n_ISForFaces,n_ISForEdges; 1124 /* near null space stuff */ 1125 MatNullSpace nearnullsp; 1126 const Vec *nearnullvecs; 1127 Vec *localnearnullsp; 1128 PetscBool nnsp_has_cnst; 1129 PetscInt nnsp_size; 1130 PetscScalar *array; 1131 /* BLAS integers */ 1132 PetscBLASInt lwork,lierr; 1133 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1134 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1135 /* LAPACK working arrays for SVD or POD */ 1136 PetscBool skip_lapack; 1137 PetscScalar *work; 1138 PetscReal *singular_vals; 1139 #if defined(PETSC_USE_COMPLEX) 1140 PetscReal *rwork; 1141 #endif 1142 #if defined(PETSC_MISSING_LAPACK_GESVD) 1143 PetscBLASInt Blas_one_2=1; 1144 PetscScalar *temp_basis,*correlation_mat; 1145 #else 1146 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1147 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1148 #endif 1149 /* change of basis */ 1150 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1151 PetscBool boolforchange; 1152 PetscBT touched,change_basis; 1153 /* auxiliary stuff */ 1154 PetscInt *nnz,*is_indices,*local_to_B; 1155 /* some quantities */ 1156 PetscInt n_vertices,total_primal_vertices; 1157 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1158 1159 1160 PetscFunctionBegin; 1161 /* Get index sets for faces, edges and vertices from graph */ 1162 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1163 pcbddc->use_vertices = PETSC_TRUE; 1164 } 1165 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1166 /* HACK: provide functions to set change of basis */ 1167 if (!ISForVertices && pcbddc->NullSpace) { 1168 pcbddc->use_change_of_basis = PETSC_TRUE; 1169 pcbddc->use_change_on_faces = PETSC_FALSE; 1170 } 1171 /* print some info */ 1172 if (pcbddc->dbg_flag) { 1173 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1174 i = 0; 1175 if (ISForVertices) { 1176 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1177 } 1178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 /* check if near null space is attached to global mat */ 1184 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1185 if (nearnullsp) { 1186 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1187 } else { /* if near null space is not provided BDDC uses constants by default */ 1188 nnsp_size = 0; 1189 nnsp_has_cnst = PETSC_TRUE; 1190 } 1191 /* get max number of constraints on a single cc */ 1192 max_constraints = nnsp_size; 1193 if (nnsp_has_cnst) max_constraints++; 1194 1195 /* 1196 Evaluate maximum storage size needed by the procedure 1197 - temp_indices will contain start index of each constraint stored as follows 1198 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1199 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1200 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1201 */ 1202 total_counts = n_ISForFaces+n_ISForEdges; 1203 total_counts *= max_constraints; 1204 n_vertices = 0; 1205 if (ISForVertices) { 1206 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1207 } 1208 total_counts += n_vertices; 1209 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1210 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1211 total_counts = 0; 1212 max_size_of_constraint = 0; 1213 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1214 if (i<n_ISForEdges) { 1215 used_IS = &ISForEdges[i]; 1216 } else { 1217 used_IS = &ISForFaces[i-n_ISForEdges]; 1218 } 1219 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1220 total_counts += j; 1221 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1222 } 1223 total_counts *= max_constraints; 1224 total_counts += n_vertices; 1225 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1226 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1227 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1228 /* local to boundary numbering */ 1229 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1230 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1231 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1232 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1233 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1234 /* get local part of global near null space vectors */ 1235 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1236 for (k=0;k<nnsp_size;k++) { 1237 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1238 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 } 1241 1242 /* whether or not to skip lapack calls */ 1243 skip_lapack = PETSC_TRUE; 1244 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1245 1246 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1247 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1248 PetscScalar temp_work; 1249 #if defined(PETSC_MISSING_LAPACK_GESVD) 1250 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1251 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1252 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1253 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1254 #if defined(PETSC_USE_COMPLEX) 1255 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1256 #endif 1257 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1258 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1259 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1260 lwork = -1; 1261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1262 #if !defined(PETSC_USE_COMPLEX) 1263 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1264 #else 1265 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1266 #endif 1267 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1268 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1269 #else /* on missing GESVD */ 1270 /* SVD */ 1271 PetscInt max_n,min_n; 1272 max_n = max_size_of_constraint; 1273 min_n = max_constraints; 1274 if (max_size_of_constraint < max_constraints) { 1275 min_n = max_size_of_constraint; 1276 max_n = max_constraints; 1277 } 1278 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1279 #if defined(PETSC_USE_COMPLEX) 1280 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1281 #endif 1282 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1283 lwork = -1; 1284 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1285 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1286 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1287 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1288 #if !defined(PETSC_USE_COMPLEX) 1289 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1290 #else 1291 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1292 #endif 1293 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1294 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1295 #endif /* on missing GESVD */ 1296 /* Allocate optimal workspace */ 1297 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1298 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1299 } 1300 /* Now we can loop on constraining sets */ 1301 total_counts = 0; 1302 temp_indices[0] = 0; 1303 /* vertices */ 1304 if (ISForVertices) { 1305 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1306 if (nnsp_has_cnst) { /* consider all vertices */ 1307 for (i=0;i<n_vertices;i++) { 1308 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1309 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1310 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1311 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1312 total_counts++; 1313 } 1314 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1315 PetscBool used_vertex; 1316 for (i=0;i<n_vertices;i++) { 1317 used_vertex = PETSC_FALSE; 1318 k = 0; 1319 while (!used_vertex && k<nnsp_size) { 1320 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1321 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1322 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1323 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1324 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1325 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1326 total_counts++; 1327 used_vertex = PETSC_TRUE; 1328 } 1329 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1330 k++; 1331 } 1332 } 1333 } 1334 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1335 n_vertices = total_counts; 1336 } 1337 1338 /* edges and faces */ 1339 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1340 if (i<n_ISForEdges) { 1341 used_IS = &ISForEdges[i]; 1342 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1343 } else { 1344 used_IS = &ISForFaces[i-n_ISForEdges]; 1345 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1346 } 1347 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1348 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1349 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1350 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1351 /* change of basis should not be performed on local periodic nodes */ 1352 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1353 if (nnsp_has_cnst) { 1354 PetscScalar quad_value; 1355 temp_constraints++; 1356 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1357 for (j=0;j<size_of_constraint;j++) { 1358 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1359 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1360 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1361 } 1362 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1363 if (boolforchange) { 1364 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1365 } 1366 total_counts++; 1367 } 1368 for (k=0;k<nnsp_size;k++) { 1369 PetscReal real_value; 1370 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1371 for (j=0;j<size_of_constraint;j++) { 1372 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1373 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1374 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1375 } 1376 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1377 /* check if array is null on the connected component */ 1378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1379 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1380 if (real_value > 0.0) { /* keep indices and values */ 1381 temp_constraints++; 1382 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1383 if (boolforchange) { 1384 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1385 } 1386 total_counts++; 1387 } 1388 } 1389 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1390 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1391 if (!pcbddc->use_nnsp_true) { 1392 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1393 1394 #if defined(PETSC_MISSING_LAPACK_GESVD) 1395 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1396 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1397 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1398 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1399 from that computed using LAPACKgesvd 1400 -> This is due to a different computation of eigenvectors in LAPACKheev 1401 -> The quality of the POD-computed basis will be the same */ 1402 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1403 /* Store upper triangular part of correlation matrix */ 1404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1405 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1406 for (j=0;j<temp_constraints;j++) { 1407 for (k=0;k<j+1;k++) { 1408 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1409 } 1410 } 1411 /* compute eigenvalues and eigenvectors of correlation matrix */ 1412 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1413 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1414 #if !defined(PETSC_USE_COMPLEX) 1415 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1416 #else 1417 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1418 #endif 1419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1421 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1422 j=0; 1423 while (j < temp_constraints && singular_vals[j] < tol) j++; 1424 total_counts=total_counts-j; 1425 /* scale and copy POD basis into used quadrature memory */ 1426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1427 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1428 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1430 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1431 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1432 if (j<temp_constraints) { 1433 PetscInt ii; 1434 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1435 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1436 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1437 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1438 for (k=0;k<temp_constraints-j;k++) { 1439 for (ii=0;ii<size_of_constraint;ii++) { 1440 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1441 } 1442 } 1443 } 1444 #else /* on missing GESVD */ 1445 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1446 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1447 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1448 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1449 #if !defined(PETSC_USE_COMPLEX) 1450 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1451 #else 1452 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1453 #endif 1454 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1455 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1456 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1457 k = temp_constraints; 1458 if (k > size_of_constraint) k = size_of_constraint; 1459 j = 0; 1460 while (j < k && singular_vals[k-j-1] < tol) j++; 1461 total_counts = total_counts-temp_constraints+k-j; 1462 #endif /* on missing GESVD */ 1463 } 1464 } 1465 /* free index sets of faces, edges and vertices */ 1466 for (i=0;i<n_ISForFaces;i++) { 1467 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1468 } 1469 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1470 for (i=0;i<n_ISForEdges;i++) { 1471 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1472 } 1473 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1474 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1475 1476 /* free workspace */ 1477 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1478 ierr = PetscFree(work);CHKERRQ(ierr); 1479 #if defined(PETSC_USE_COMPLEX) 1480 ierr = PetscFree(rwork);CHKERRQ(ierr); 1481 #endif 1482 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1483 #if defined(PETSC_MISSING_LAPACK_GESVD) 1484 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1485 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1486 #endif 1487 } 1488 for (k=0;k<nnsp_size;k++) { 1489 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1490 } 1491 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1492 1493 /* set quantities in pcbddc data structure */ 1494 /* n_vertices defines the number of subdomain corners in the primal space */ 1495 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1496 pcbddc->local_primal_size = total_counts; 1497 pcbddc->n_vertices = n_vertices; 1498 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1499 1500 /* Create constraint matrix */ 1501 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1502 /* so we need to set it up properly either with or without change of basis */ 1503 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1504 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1505 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1506 /* array to compute a local numbering of constraints : vertices first then constraints */ 1507 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1508 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1509 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1510 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1511 /* auxiliary stuff for basis change */ 1512 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1513 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1514 1515 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1516 total_primal_vertices=0; 1517 for (i=0;i<pcbddc->local_primal_size;i++) { 1518 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1519 if (size_of_constraint == 1) { 1520 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1521 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1522 aux_primal_minloc[total_primal_vertices]=0; 1523 total_primal_vertices++; 1524 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1525 PetscInt min_loc,min_index; 1526 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1527 /* find first untouched local node */ 1528 k = 0; 1529 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1530 min_index = global_indices[k]; 1531 min_loc = k; 1532 /* search the minimum among global nodes already untouched on the cc */ 1533 for (k=1;k<size_of_constraint;k++) { 1534 /* there can be more than one constraint on a single connected component */ 1535 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1536 min_index = global_indices[k]; 1537 min_loc = k; 1538 } 1539 } 1540 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1541 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1542 aux_primal_minloc[total_primal_vertices]=min_loc; 1543 total_primal_vertices++; 1544 } 1545 } 1546 /* free workspace */ 1547 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1548 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1549 /* permute indices in order to have a sorted set of vertices */ 1550 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1551 1552 /* nonzero structure of constraint matrix */ 1553 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1554 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1555 j=total_primal_vertices; 1556 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1557 if (!PetscBTLookup(change_basis,i)) { 1558 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1559 j++; 1560 } 1561 } 1562 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1563 ierr = PetscFree(nnz);CHKERRQ(ierr); 1564 /* set values in constraint matrix */ 1565 for (i=0;i<total_primal_vertices;i++) { 1566 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1567 } 1568 total_counts = total_primal_vertices; 1569 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1570 if (!PetscBTLookup(change_basis,i)) { 1571 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1572 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1573 total_counts++; 1574 } 1575 } 1576 /* assembling */ 1577 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1578 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1579 /* 1580 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1581 */ 1582 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1583 if (pcbddc->use_change_of_basis) { 1584 PetscBool qr_needed = PETSC_FALSE; 1585 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1586 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1587 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1588 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1589 /* work arrays */ 1590 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1591 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1592 /* nonzeros per row */ 1593 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1594 if (PetscBTLookup(change_basis,i)) { 1595 qr_needed = PETSC_TRUE; 1596 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1597 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1598 } 1599 } 1600 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1601 ierr = PetscFree(nnz);CHKERRQ(ierr); 1602 /* Set initial identity in the matrix */ 1603 for (i=0;i<pcis->n_B;i++) { 1604 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 1607 /* Now we loop on the constraints which need a change of basis */ 1608 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1609 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1610 /* Change of basis matrix T computed via QR decomposition of constraints */ 1611 if (qr_needed) { 1612 /* dual and primal dofs on a single cc */ 1613 PetscInt dual_dofs,primal_dofs; 1614 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1615 PetscInt primal_counter; 1616 /* working stuff for GEQRF */ 1617 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1618 PetscBLASInt lqr_work; 1619 /* working stuff for UNGQR */ 1620 PetscScalar *gqr_work,lgqr_work_t; 1621 PetscBLASInt lgqr_work; 1622 /* working stuff for TRTRS */ 1623 PetscScalar *trs_rhs; 1624 PetscBLASInt Blas_NRHS; 1625 /* pointers for values insertion into change of basis matrix */ 1626 PetscInt *start_rows,*start_cols; 1627 PetscScalar *start_vals; 1628 /* working stuff for values insertion */ 1629 PetscBT is_primal; 1630 1631 /* space to store Q */ 1632 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1633 /* first we issue queries for optimal work */ 1634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1635 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1637 lqr_work = -1; 1638 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1639 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1640 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1641 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1642 lgqr_work = -1; 1643 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1644 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1645 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1646 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1647 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1648 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1649 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1650 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1651 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1652 /* array to store scaling factors for reflectors */ 1653 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1654 /* array to store rhs and solution of triangular solver */ 1655 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1656 /* array to store whether a node is primal or not */ 1657 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1658 for (i=0;i<total_primal_vertices;i++) { 1659 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1660 } 1661 1662 /* allocating workspace for check */ 1663 if (pcbddc->dbg_flag) { 1664 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1665 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1666 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1667 } 1668 1669 /* loop on constraints and see whether or not they need a change of basis */ 1670 /* -> using implicit ordering contained in temp_indices data */ 1671 total_counts = pcbddc->n_vertices; 1672 primal_counter = total_counts; 1673 while (total_counts<pcbddc->local_primal_size) { 1674 primal_dofs = 1; 1675 if (PetscBTLookup(change_basis,total_counts)) { 1676 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1677 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1678 primal_dofs++; 1679 } 1680 /* get constraint info */ 1681 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1682 dual_dofs = size_of_constraint-primal_dofs; 1683 1684 /* copy quadrature constraints for change of basis check */ 1685 if (pcbddc->dbg_flag) { 1686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1687 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1688 } 1689 1690 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1691 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1692 1693 /* compute QR decomposition of constraints */ 1694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1695 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1696 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1697 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1698 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1699 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1700 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1701 1702 /* explictly compute R^-T */ 1703 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1704 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1705 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1706 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1709 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1710 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1711 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1712 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1713 1714 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1717 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1718 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1720 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1721 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1722 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1723 1724 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1725 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1726 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1727 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1728 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1729 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1731 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1734 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1736 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1737 1738 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1739 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1740 /* insert cols for primal dofs */ 1741 for (j=0;j<primal_dofs;j++) { 1742 start_vals = &qr_basis[j*size_of_constraint]; 1743 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1744 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1745 } 1746 /* insert cols for dual dofs */ 1747 for (j=0,k=0;j<dual_dofs;k++) { 1748 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1749 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1750 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1751 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1752 j++; 1753 } 1754 } 1755 1756 /* check change of basis */ 1757 if (pcbddc->dbg_flag) { 1758 PetscInt ii,jj; 1759 PetscBool valid_qr=PETSC_TRUE; 1760 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1762 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1765 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1766 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1767 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1768 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1769 for (jj=0;jj<size_of_constraint;jj++) { 1770 for (ii=0;ii<primal_dofs;ii++) { 1771 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1772 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1773 } 1774 } 1775 if (!valid_qr) { 1776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1777 for (jj=0;jj<size_of_constraint;jj++) { 1778 for (ii=0;ii<primal_dofs;ii++) { 1779 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1780 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1781 } 1782 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1783 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1784 } 1785 } 1786 } 1787 } else { 1788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1789 } 1790 } 1791 /* increment primal counter */ 1792 primal_counter += primal_dofs; 1793 } else { 1794 if (pcbddc->dbg_flag) { 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1796 } 1797 } 1798 /* increment constraint counter total_counts */ 1799 total_counts += primal_dofs; 1800 } 1801 if (pcbddc->dbg_flag) { 1802 ierr = PetscFree(work);CHKERRQ(ierr); 1803 } 1804 /* free workspace */ 1805 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1806 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1807 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1808 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1809 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1810 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1811 } 1812 /* assembling */ 1813 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1814 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1815 /* 1816 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1817 */ 1818 } 1819 if (pcbddc->dbg_flag) { 1820 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1821 } 1822 /* free workspace */ 1823 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1824 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1825 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1826 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1827 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1828 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1829 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1830 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1836 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1837 { 1838 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1839 PC_IS *pcis = (PC_IS*)pc->data; 1840 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1841 PetscInt bs,ierr,i,vertex_size; 1842 PetscViewer viewer=pcbddc->dbg_viewer; 1843 1844 PetscFunctionBegin; 1845 /* Init local Graph struct */ 1846 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1847 1848 /* Check validity of the csr graph passed in by the user */ 1849 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1850 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1851 } 1852 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1853 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1854 Mat mat_adj; 1855 const PetscInt *xadj,*adjncy; 1856 PetscBool flg_row=PETSC_TRUE; 1857 1858 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1859 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1860 if (!flg_row) { 1861 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1862 } 1863 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1864 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1865 if (!flg_row) { 1866 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1867 } 1868 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1869 } 1870 1871 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1872 vertex_size = 1; 1873 if (!pcbddc->n_ISForDofs) { 1874 IS *custom_ISForDofs; 1875 1876 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1877 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1878 for (i=0;i<bs;i++) { 1879 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1880 } 1881 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1882 /* remove my references to IS objects */ 1883 for (i=0;i<bs;i++) { 1884 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1885 } 1886 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1887 } else { /* mat block size as vertex size (used for elasticity) */ 1888 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1889 } 1890 1891 /* Setup of Graph */ 1892 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1893 1894 /* Graph's connected components analysis */ 1895 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1896 1897 /* print some info to stdout */ 1898 if (pcbddc->dbg_flag) { 1899 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1906 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1907 { 1908 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1909 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1910 PetscErrorCode ierr; 1911 1912 PetscFunctionBegin; 1913 n = 0; 1914 vertices = 0; 1915 if (pcbddc->ConstraintMatrix) { 1916 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1917 for (i=0;i<local_primal_size;i++) { 1918 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1919 if (size_of_constraint == 1) n++; 1920 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1921 } 1922 if (vertices_idx) { 1923 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1924 n = 0; 1925 for (i=0;i<local_primal_size;i++) { 1926 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1927 if (size_of_constraint == 1) { 1928 vertices[n++]=row_cmat_indices[0]; 1929 } 1930 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1931 } 1932 } 1933 } 1934 *n_vertices = n; 1935 if (vertices_idx) *vertices_idx = vertices; 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1941 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1942 { 1943 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1944 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1945 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1946 PetscBT touched; 1947 PetscErrorCode ierr; 1948 1949 /* This function assumes that the number of local constraints per connected component 1950 is not greater than the number of nodes defined for the connected component 1951 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1952 PetscFunctionBegin; 1953 n = 0; 1954 constraints_index = 0; 1955 if (pcbddc->ConstraintMatrix) { 1956 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1957 max_size_of_constraint = 0; 1958 for (i=0;i<local_primal_size;i++) { 1959 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1960 if (size_of_constraint > 1) { 1961 n++; 1962 } 1963 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1964 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1965 } 1966 if (constraints_idx) { 1967 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1968 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1969 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1970 n = 0; 1971 for (i=0;i<local_primal_size;i++) { 1972 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1973 if (size_of_constraint > 1) { 1974 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1975 /* find first untouched local node */ 1976 j = 0; 1977 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1978 min_index = row_cmat_global_indices[j]; 1979 min_loc = j; 1980 /* search the minimum among nodes not yet touched on the connected component 1981 since there can be more than one constraint on a single cc */ 1982 for (j=1;j<size_of_constraint;j++) { 1983 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1984 min_index = row_cmat_global_indices[j]; 1985 min_loc = j; 1986 } 1987 } 1988 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1989 constraints_index[n++] = row_cmat_indices[min_loc]; 1990 } 1991 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1992 } 1993 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1994 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1995 } 1996 } 1997 *n_constraints = n; 1998 if (constraints_idx) *constraints_idx = constraints_index; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /* the next two functions has been adapted from pcis.c */ 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "PCBDDCApplySchur" 2005 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2006 { 2007 PetscErrorCode ierr; 2008 PC_IS *pcis = (PC_IS*)(pc->data); 2009 2010 PetscFunctionBegin; 2011 if (!vec2_B) { vec2_B = v; } 2012 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2013 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2014 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2015 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2016 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2022 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2023 { 2024 PetscErrorCode ierr; 2025 PC_IS *pcis = (PC_IS*)(pc->data); 2026 2027 PetscFunctionBegin; 2028 if (!vec2_B) { vec2_B = v; } 2029 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2030 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2031 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2032 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2033 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "PCBDDCSubsetNumbering" 2039 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2040 { 2041 Vec local_vec,global_vec; 2042 IS seqis,paris; 2043 VecScatter scatter_ctx; 2044 PetscScalar *array; 2045 PetscInt *temp_global_dofs; 2046 PetscScalar globalsum; 2047 PetscInt i,j,s; 2048 PetscInt nlocals,first_index,old_index,max_local; 2049 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2050 PetscMPIInt *dof_sizes,*dof_displs; 2051 PetscBool first_found; 2052 PetscErrorCode ierr; 2053 2054 PetscFunctionBegin; 2055 /* mpi buffers */ 2056 MPI_Comm_size(comm,&size_prec_comm); 2057 MPI_Comm_rank(comm,&rank_prec_comm); 2058 j = ( !rank_prec_comm ? size_prec_comm : 0); 2059 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2060 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2061 /* get maximum size of subset */ 2062 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2063 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2064 max_local = 0; 2065 if (n_local_dofs) { 2066 max_local = temp_global_dofs[0]; 2067 for (i=1;i<n_local_dofs;i++) { 2068 if (max_local < temp_global_dofs[i] ) { 2069 max_local = temp_global_dofs[i]; 2070 } 2071 } 2072 } 2073 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2074 max_global++; 2075 max_local = 0; 2076 if (n_local_dofs) { 2077 max_local = local_dofs[0]; 2078 for (i=1;i<n_local_dofs;i++) { 2079 if (max_local < local_dofs[i] ) { 2080 max_local = local_dofs[i]; 2081 } 2082 } 2083 } 2084 max_local++; 2085 /* allocate workspace */ 2086 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2087 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2088 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2089 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2090 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2091 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2092 /* create scatter */ 2093 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2094 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2095 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2096 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2097 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2098 /* init array */ 2099 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2100 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2101 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2102 if (local_dofs_mult) { 2103 for (i=0;i<n_local_dofs;i++) { 2104 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2105 } 2106 } else { 2107 for (i=0;i<n_local_dofs;i++) { 2108 array[local_dofs[i]]=1.0; 2109 } 2110 } 2111 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2112 /* scatter into global vec and get total number of global dofs */ 2113 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2114 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2116 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2117 /* Fill global_vec with cumulative function for global numbering */ 2118 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2119 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2120 nlocals = 0; 2121 first_index = -1; 2122 first_found = PETSC_FALSE; 2123 for (i=0;i<s;i++) { 2124 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2125 first_found = PETSC_TRUE; 2126 first_index = i; 2127 } 2128 nlocals += (PetscInt)PetscRealPart(array[i]); 2129 } 2130 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2131 if (!rank_prec_comm) { 2132 dof_displs[0]=0; 2133 for (i=1;i<size_prec_comm;i++) { 2134 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2135 } 2136 } 2137 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2138 if (first_found) { 2139 array[first_index] += (PetscScalar)nlocals; 2140 old_index = first_index; 2141 for (i=first_index+1;i<s;i++) { 2142 if (PetscRealPart(array[i]) > 0.0) { 2143 array[i] += array[old_index]; 2144 old_index = i; 2145 } 2146 } 2147 } 2148 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2149 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2150 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2151 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2152 /* get global ordering of local dofs */ 2153 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2154 if (local_dofs_mult) { 2155 for (i=0;i<n_local_dofs;i++) { 2156 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2157 } 2158 } else { 2159 for (i=0;i<n_local_dofs;i++) { 2160 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2161 } 2162 } 2163 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2164 /* free workspace */ 2165 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2166 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2167 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2168 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2169 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2170 /* return pointer to global ordering of local dofs */ 2171 *global_numbering_subset = temp_global_dofs; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2177 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2178 { 2179 PetscInt i,j; 2180 PetscScalar *alphas; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 /* this implements stabilized Gram-Schmidt */ 2185 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2186 for (i=0;i<n;i++) { 2187 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2188 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2189 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2190 } 2191 ierr = PetscFree(alphas);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatConvert_IS_AIJ" 2198 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2199 { 2200 Mat new_mat; 2201 Mat_IS *matis = (Mat_IS*)(mat->data); 2202 /* info on mat */ 2203 PetscInt bs,rows,cols; 2204 PetscInt lrows,lcols; 2205 PetscInt local_rows,local_cols; 2206 PetscMPIInt nsubdomains; 2207 /* preallocation */ 2208 Vec vec_dnz,vec_onz; 2209 PetscScalar *my_dnz,*my_onz,*array; 2210 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2211 PetscInt index_row,index_col,owner; 2212 PetscInt *local_indices,*global_indices; 2213 /* work */ 2214 PetscInt i,j; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 /* CHECKS */ 2219 /* get info from mat */ 2220 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2221 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2222 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2223 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2224 2225 /* MAT_INITIAL_MATRIX */ 2226 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2227 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2228 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2229 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2230 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2231 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2232 2233 /* 2234 preallocation 2235 */ 2236 2237 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2238 /* 2239 Some vectors are needed to sum up properly on shared interface dofs. 2240 Note that preallocation is not exact, since it overestimates nonzeros 2241 */ 2242 /* 2243 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2244 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2245 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2246 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2247 */ 2248 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2249 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2250 /* All processes need to compute entire row ownership */ 2251 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2252 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2253 for (i=0;i<nsubdomains;i++) { 2254 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2255 row_ownership[j]=i; 2256 } 2257 } 2258 /* map indices of local mat to global values */ 2259 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2260 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2261 for (i=0;i<local_rows;i++) local_indices[i]=i; 2262 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2263 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2264 2265 /* 2266 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2267 then, they will be summed up properly. This way, preallocation is always sufficient 2268 */ 2269 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2270 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2271 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2272 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2273 for (i=0;i<local_rows;i++) { 2274 index_row = global_indices[i]; 2275 for (j=i;j<local_rows;j++) { 2276 owner = row_ownership[index_row]; 2277 index_col = global_indices[j]; 2278 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2279 my_dnz[i] += 1.0; 2280 } else { /* offdiag block */ 2281 my_onz[i] += 1.0; 2282 } 2283 /* same as before, interchanging rows and cols */ 2284 if (i != j) { 2285 owner = row_ownership[index_col]; 2286 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2287 my_dnz[j] += 1.0; 2288 } else { 2289 my_onz[j] += 1.0; 2290 } 2291 } 2292 } 2293 } 2294 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2295 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2296 if (local_rows) { /* multilevel guard */ 2297 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2298 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2299 } 2300 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2301 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2302 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2303 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2304 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2305 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2306 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2307 2308 /* set computed preallocation in dnz and onz */ 2309 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2310 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2311 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2312 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2313 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2314 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2315 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2316 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2317 2318 /* Resize preallocation if overestimated */ 2319 for (i=0;i<lrows;i++) { 2320 dnz[i] = PetscMin(dnz[i],lcols); 2321 onz[i] = PetscMin(onz[i],cols-lcols); 2322 } 2323 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2324 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2325 2326 /* 2327 Set values. Very Basic. 2328 */ 2329 for (i=0;i<local_rows;i++) { 2330 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2331 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2332 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2333 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2334 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2335 } 2336 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2337 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2338 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 2340 /* get back mat */ 2341 *M = new_mat; 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatISSubassemble_Private" 2347 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2348 { 2349 Mat subdomain_adj; 2350 IS new_ranks,ranks_send_to; 2351 MatPartitioning partitioner; 2352 Mat_IS *matis; 2353 PetscInt n_neighs,*neighs,*n_shared,**shared; 2354 PetscInt prank; 2355 PetscMPIInt size,rank,color; 2356 PetscInt *xadj,*adjncy,*oldranks; 2357 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2358 PetscInt i,j,n_subdomains,local_size,threshold=0; 2359 PetscErrorCode ierr; 2360 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2361 PetscSubcomm subcomm; 2362 2363 PetscFunctionBegin; 2364 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2365 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2366 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2367 2368 /* Get info on mapping */ 2369 matis = (Mat_IS*)(mat->data); 2370 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2371 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2372 2373 /* build local CSR graph of subdomains' connectivity */ 2374 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2375 xadj[0] = 0; 2376 xadj[1] = PetscMax(n_neighs-1,0); 2377 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2378 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2379 2380 if (threshold) { 2381 PetscInt* count,min_threshold; 2382 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2383 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2384 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2385 for (j=0;j<n_shared[i];j++) { 2386 count[shared[i][j]] += 1; 2387 } 2388 } 2389 /* adapt threshold since we dont want to lose significant connections */ 2390 min_threshold = n_neighs; 2391 for (i=1;i<n_neighs;i++) { 2392 for (j=0;j<n_shared[i];j++) { 2393 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2394 } 2395 } 2396 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2397 threshold = PetscMax(min_threshold+1,threshold); 2398 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2399 xadj[1] = 0; 2400 for (i=1;i<n_neighs;i++) { 2401 for (j=0;j<n_shared[i];j++) { 2402 if (count[shared[i][j]] < threshold) { 2403 adjncy[xadj[1]] = neighs[i]; 2404 adjncy_wgt[xadj[1]] = n_shared[i]; 2405 xadj[1]++; 2406 break; 2407 } 2408 } 2409 } 2410 ierr = PetscFree(count);CHKERRQ(ierr); 2411 } else { 2412 if (xadj[1]) { 2413 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2414 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2415 } 2416 } 2417 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2418 if (use_square) { 2419 for (i=0;i<xadj[1];i++) { 2420 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2421 } 2422 } 2423 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2424 2425 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2426 2427 /* 2428 Restrict work on active processes only. 2429 */ 2430 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2431 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2432 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2433 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2434 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr); 2435 if (color) { 2436 ierr = PetscFree(xadj);CHKERRQ(ierr); 2437 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2438 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2439 } else { 2440 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2441 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2442 prank = rank; 2443 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2444 for (i=0;i<size;i++) { 2445 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2446 } 2447 for (i=0;i<xadj[1];i++) { 2448 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2449 } 2450 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2451 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2452 n_subdomains = (PetscInt)size; 2453 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2454 2455 /* Partition */ 2456 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2457 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2458 if (use_vwgt) { 2459 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2460 v_wgt[0] = local_size; 2461 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2462 } 2463 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2464 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2465 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2466 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2467 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2468 2469 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2470 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2471 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2472 /* clean up */ 2473 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2474 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2475 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2476 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2477 } 2478 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2479 2480 /* assemble parallel IS for sends */ 2481 i = 1; 2482 if (color) i=0; 2483 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2484 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2485 2486 /* get back IS */ 2487 *is_sends = ranks_send_to; 2488 PetscFunctionReturn(0); 2489 } 2490 2491 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2492 2493 #undef __FUNCT__ 2494 #define __FUNCT__ "MatISSubassemble" 2495 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2496 { 2497 Mat local_mat,new_mat; 2498 Mat_IS *matis; 2499 IS is_sends_internal; 2500 PetscInt rows,cols; 2501 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2502 PetscBool ismatis,isdense; 2503 ISLocalToGlobalMapping l2gmap; 2504 PetscInt* l2gmap_indices; 2505 const PetscInt* is_indices; 2506 MatType new_local_type; 2507 MatTypePrivate new_local_type_private; 2508 /* buffers */ 2509 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2510 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2511 /* MPI */ 2512 MPI_Comm comm; 2513 PetscMPIInt n_sends,n_recvs,commsize; 2514 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2515 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2516 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2517 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 /* checks */ 2522 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2523 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2524 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2525 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2526 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2527 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2528 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2529 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2530 PetscValidLogicalCollectiveInt(mat,bs,0); 2531 /* prepare IS for sending if not provided */ 2532 if (!is_sends) { 2533 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2534 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2535 } else { 2536 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2537 is_sends_internal = is_sends; 2538 } 2539 2540 /* get pointer of MATIS data */ 2541 matis = (Mat_IS*)mat->data; 2542 2543 /* get comm */ 2544 comm = PetscObjectComm((PetscObject)mat); 2545 2546 /* compute number of sends */ 2547 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2548 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2549 2550 /* compute number of receives */ 2551 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2552 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2553 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2554 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2555 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2556 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2557 ierr = PetscFree(iflags);CHKERRQ(ierr); 2558 2559 /* prepare send/receive buffers */ 2560 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2561 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2562 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2563 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2564 2565 /* Get data from local mat */ 2566 if (!isdense) { 2567 /* TODO: See below some guidelines on how to prepare the local buffers */ 2568 /* 2569 send_buffer_vals should contain the raw values of the local matrix 2570 send_buffer_idxs should contain: 2571 - MatType_PRIVATE type 2572 - PetscInt size_of_l2gmap 2573 - PetscInt global_row_indices[size_of_l2gmap] 2574 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2575 */ 2576 } else { 2577 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2578 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2579 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2580 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2581 send_buffer_idxs[1] = i; 2582 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2583 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2584 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2585 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2586 for (i=0;i<n_sends;i++) { 2587 ilengths_vals[is_indices[i]] = len*len; 2588 ilengths_idxs[is_indices[i]] = len+2; 2589 } 2590 } 2591 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2592 buf_size_idxs = 0; 2593 buf_size_vals = 0; 2594 for (i=0;i<n_recvs;i++) { 2595 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2596 buf_size_vals += (PetscInt)olengths_vals[i]; 2597 } 2598 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2599 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2600 2601 /* get new tags for clean communications */ 2602 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2603 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2604 2605 /* allocate for requests */ 2606 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2607 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2608 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2609 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2610 2611 /* communications */ 2612 ptr_idxs = recv_buffer_idxs; 2613 ptr_vals = recv_buffer_vals; 2614 for (i=0;i<n_recvs;i++) { 2615 source_dest = onodes[i]; 2616 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2617 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2618 ptr_idxs += olengths_idxs[i]; 2619 ptr_vals += olengths_vals[i]; 2620 } 2621 for (i=0;i<n_sends;i++) { 2622 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2623 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2624 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2625 } 2626 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2627 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2628 2629 /* assemble new l2g map */ 2630 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2631 ptr_idxs = recv_buffer_idxs; 2632 buf_size_idxs = 0; 2633 for (i=0;i<n_recvs;i++) { 2634 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2635 ptr_idxs += olengths_idxs[i]; 2636 } 2637 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2638 ptr_idxs = recv_buffer_idxs; 2639 buf_size_idxs = 0; 2640 for (i=0;i<n_recvs;i++) { 2641 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2642 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2643 ptr_idxs += olengths_idxs[i]; 2644 } 2645 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2646 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2647 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2648 2649 /* infer new local matrix type from received local matrices type */ 2650 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2651 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2652 new_local_type_private = MATAIJ_PRIVATE; 2653 new_local_type = MATSEQAIJ; 2654 if (n_recvs) { 2655 new_local_type_private = send_buffer_idxs[0]; 2656 ptr_idxs = recv_buffer_idxs; 2657 for (i=0;i<n_recvs;i++) { 2658 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2659 new_local_type_private = MATAIJ_PRIVATE; 2660 break; 2661 } 2662 ptr_idxs += olengths_idxs[i]; 2663 } 2664 switch (new_local_type_private) { 2665 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2666 new_local_type = MATSEQAIJ; 2667 bs = 1; 2668 break; 2669 case MATAIJ_PRIVATE: 2670 new_local_type = MATSEQAIJ; 2671 bs = 1; 2672 break; 2673 case MATBAIJ_PRIVATE: 2674 new_local_type = MATSEQBAIJ; 2675 break; 2676 case MATSBAIJ_PRIVATE: 2677 new_local_type = MATSEQSBAIJ; 2678 break; 2679 default: 2680 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2681 break; 2682 } 2683 } 2684 2685 /* create MATIS object */ 2686 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2687 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2688 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2689 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2690 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2691 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2692 2693 /* set values */ 2694 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2695 ptr_vals = recv_buffer_vals; 2696 ptr_idxs = recv_buffer_idxs; 2697 for (i=0;i<n_recvs;i++) { 2698 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2699 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2700 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2701 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2702 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2703 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2704 } 2705 ptr_idxs += olengths_idxs[i]; 2706 ptr_vals += olengths_vals[i]; 2707 } 2708 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2709 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2712 2713 { /* check */ 2714 Vec lvec,rvec; 2715 PetscReal infty_error; 2716 2717 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2718 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2719 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2720 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2721 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2722 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2723 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2724 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2725 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2726 } 2727 2728 /* free workspace */ 2729 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2730 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2731 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2732 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2733 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2734 if (isdense) { 2735 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2736 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2737 } else { 2738 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2739 } 2740 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2741 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2742 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2743 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2744 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2745 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2746 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2747 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2748 ierr = PetscFree(onodes);CHKERRQ(ierr); 2749 /* get back new mat */ 2750 *mat_n = new_mat; 2751 PetscFunctionReturn(0); 2752 } 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2756 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2757 { 2758 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2759 PC_IS *pcis = (PC_IS*)pc->data; 2760 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2761 MatNullSpace CoarseNullSpace=NULL; 2762 ISLocalToGlobalMapping coarse_islg; 2763 IS coarse_is; 2764 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2765 PetscInt im_active=-1,active_procs=-1; 2766 PC pc_temp; 2767 PCType coarse_pc_type; 2768 KSPType coarse_ksp_type; 2769 PetscBool multilevel_requested,multilevel_allowed; 2770 PetscBool setsym,issym,isbddc,isnn; 2771 MatStructure matstruct; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 /* Assign global numbering to coarse dofs */ 2776 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2777 2778 /* infer some info from user */ 2779 issym = PETSC_FALSE; 2780 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2781 multilevel_allowed = PETSC_FALSE; 2782 multilevel_requested = PETSC_FALSE; 2783 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2784 if (multilevel_requested) { 2785 /* count "active processes" */ 2786 im_active = !!(pcis->n); 2787 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2788 if (active_procs/pcbddc->coarsening_ratio < 2) { 2789 multilevel_allowed = PETSC_FALSE; 2790 } else { 2791 multilevel_allowed = PETSC_TRUE; 2792 } 2793 } 2794 2795 /* set defaults for coarse KSP and PC */ 2796 if (multilevel_allowed) { 2797 if (issym) { 2798 coarse_ksp_type = KSPRICHARDSON; 2799 } else { 2800 coarse_ksp_type = KSPCHEBYSHEV; 2801 } 2802 coarse_pc_type = PCBDDC; 2803 } else { 2804 coarse_ksp_type = KSPPREONLY; 2805 coarse_pc_type = PCREDUNDANT; 2806 } 2807 2808 /* create the coarse KSP object only once with defaults */ 2809 if (!pcbddc->coarse_ksp) { 2810 char prefix[256],str_level[3]; 2811 size_t len; 2812 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2813 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2814 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2815 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2816 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2817 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2818 /* prefix */ 2819 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2820 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2821 if (!pcbddc->current_level) { 2822 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2823 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2824 } else { 2825 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2826 if (pcbddc->current_level>1) len -= 2; 2827 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 2828 *(prefix+len)='\0'; 2829 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 2830 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 2831 } 2832 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 2833 } 2834 /* allow user customization */ 2835 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2836 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2837 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2838 2839 /* get some info after set from options */ 2840 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2841 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 2842 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2843 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 2844 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2845 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2846 isbddc = PETSC_FALSE; 2847 } 2848 2849 /* propagate BDDC info to the next level */ 2850 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2851 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2852 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2853 2854 /* creates temporary MATIS object for coarse matrix */ 2855 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2856 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2857 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2858 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2859 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2860 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2863 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2864 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 2865 2866 /* assemble coarse matrix */ 2867 if (isbddc || isnn) { 2868 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2869 } else { 2870 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2871 } 2872 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2873 2874 /* create local to global scatters for coarse problem */ 2875 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2876 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2877 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2878 2879 /* propagate symmetry info to coarse matrix */ 2880 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 2881 2882 /* Compute coarse null space (special handling by BDDC only) */ 2883 if (pcbddc->NullSpace) { 2884 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 2885 if (isbddc) { 2886 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 2887 } else { 2888 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 2889 } 2890 } 2891 2892 /* set operators */ 2893 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 2894 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 2895 2896 /* additional KSP customization */ 2897 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 2898 if (max_it < 5) { 2899 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 2900 } 2901 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 2902 2903 2904 /* print some info if requested */ 2905 if (pcbddc->dbg_flag) { 2906 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 2907 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 2908 if (!multilevel_allowed) { 2909 if (multilevel_requested) { 2910 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2911 } else if (pcbddc->max_levels) { 2912 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 2913 } 2914 } 2915 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 2916 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2917 } 2918 2919 /* setup coarse ksp */ 2920 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2921 if (pcbddc->dbg_flag) { 2922 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 2923 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2924 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 2925 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2926 } 2927 2928 /* Check coarse problem if requested */ 2929 if (pcbddc->dbg_flag) { 2930 KSP check_ksp; 2931 KSPType check_ksp_type; 2932 PC check_pc; 2933 Vec check_vec; 2934 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 2935 PetscInt its; 2936 PetscBool ispreonly,compute; 2937 2938 /* Create ksp object suitable for estimation of extreme eigenvalues */ 2939 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 2940 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2941 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 2942 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 2943 if (ispreonly) { 2944 check_ksp_type = KSPPREONLY; 2945 compute = PETSC_FALSE; 2946 } else { 2947 if (issym) check_ksp_type = KSPCG; 2948 else check_ksp_type = KSPGMRES; 2949 compute = PETSC_TRUE; 2950 } 2951 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 2952 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 2953 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 2954 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 2955 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 2956 /* create random vec */ 2957 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 2958 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 2959 if (CoarseNullSpace) { 2960 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 2961 } 2962 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2963 /* solve coarse problem */ 2964 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2965 if (CoarseNullSpace) { 2966 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 2967 } 2968 /* check coarse problem residual error */ 2969 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 2970 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2971 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2972 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 2973 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 2974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 2975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 2976 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 2977 /* get eigenvalue estimation if preonly has not been requested */ 2978 if (!ispreonly) { 2979 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2980 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 2981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 2982 } 2983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2984 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 2985 } 2986 /* free memory */ 2987 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 2988 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 2994 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 2995 { 2996 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2997 PC_IS* pcis = (PC_IS*)pc->data; 2998 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2999 PetscInt i,j,coarse_size; 3000 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3001 PetscErrorCode ierr; 3002 3003 PetscFunctionBegin; 3004 /* get indices in local ordering for vertices and constraints */ 3005 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3006 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3007 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3008 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3009 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3010 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3011 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3012 3013 /* Compute global number of coarse dofs */ 3014 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3015 3016 /* check numbering */ 3017 if (pcbddc->dbg_flag) { 3018 PetscScalar coarsesum,*array; 3019 3020 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3023 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3024 for (i=0;i<pcbddc->local_primal_size;i++) { 3025 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3026 } 3027 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3028 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3029 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3030 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3031 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3032 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3033 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3034 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3035 for (i=0;i<pcis->n;i++) { 3036 if (array[i] == 1.0) { 3037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3038 } 3039 } 3040 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3041 for (i=0;i<pcis->n;i++) { 3042 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3043 } 3044 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3045 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3046 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3047 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3048 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3050 if (pcbddc->dbg_flag > 1) { 3051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3052 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3053 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3054 for (i=0;i<pcbddc->local_primal_size;i++) { 3055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3056 } 3057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3058 } 3059 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3060 } 3061 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3062 /* get back data */ 3063 *coarse_size_n = coarse_size; 3064 *local_primal_indices_n = local_primal_indices; 3065 PetscFunctionReturn(0); 3066 } 3067 3068