1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCCreateWorkVectors" 110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 111 { 112 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 113 PC_IS *pcis = (PC_IS*)pc->data; 114 VecType impVecType; 115 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 120 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 121 local_primal_size = n_constraints+n_vertices; 122 n_R = pcis->n-n_vertices; 123 /* local work vectors */ 124 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 125 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 126 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 127 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 128 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 129 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 130 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 131 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 132 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 133 if (n_constraints) { 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 137 } 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 144 { 145 PetscErrorCode ierr; 146 /* pointers to pcis and pcbddc */ 147 PC_IS* pcis = (PC_IS*)pc->data; 148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 149 /* submatrices of local problem */ 150 Mat A_RV,A_VR,A_VV; 151 /* working matrices */ 152 Mat M1,M2,M3,C_CR; 153 /* working vectors */ 154 Vec vec1_C,vec2_C,vec1_V,vec2_V; 155 /* additional working stuff */ 156 IS is_aux; 157 ISLocalToGlobalMapping BtoNmap; 158 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 159 const PetscScalar *array,*row_cmat_values; 160 const PetscInt *row_cmat_indices,*idx_R_local; 161 PetscInt *vertices,*idx_V_B,*auxindices; 162 PetscInt n_vertices,n_constraints,size_of_constraint; 163 PetscInt i,j,n_R,n_D,n_B; 164 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 165 /* Vector and matrix types */ 166 VecType impVecType; 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 184 185 /* Allocating some extra storage just to be safe */ 186 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 187 for (i=0;i<pcis->n;i++) auxindices[i]=i; 188 189 /* vertices in boundary numbering */ 190 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 191 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 192 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 194 if (i != n_vertices) { 195 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 196 } 197 198 /* some work vectors on vertices and/or constraints */ 199 if (n_vertices) { 200 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 201 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 202 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 203 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 204 } 205 if (n_constraints) { 206 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 208 } 209 210 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 211 if (n_constraints) { 212 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 213 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 214 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 215 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 216 217 /* Extract constraints on R nodes: C_{CR} */ 218 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 220 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 221 222 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 223 for (i=0;i<n_constraints;i++) { 224 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 225 /* Get row of constraint matrix in R numbering */ 226 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 227 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 228 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 229 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 230 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 231 /* Solve for row of constraint matrix in R numbering */ 232 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 233 /* Set values in local_auxmat2 */ 234 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 235 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 237 } 238 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 241 242 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 243 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 244 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 245 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 246 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 247 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(M1);CHKERRQ(ierr); 249 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 250 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 251 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 252 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 254 ierr = MatDestroy(&M2);CHKERRQ(ierr); 255 ierr = MatDestroy(&M3);CHKERRQ(ierr); 256 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 257 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 258 } 259 260 /* Get submatrices from subdomain matrix */ 261 if (n_vertices) { 262 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 265 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 266 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 267 } 268 269 /* Matrix of coarse basis functions (local) */ 270 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 271 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 272 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 273 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 274 if (pcbddc->switch_static || pcbddc->dbg_flag) { 275 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 276 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 277 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 279 } 280 281 if (pcbddc->dbg_flag) { 282 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 283 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 284 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 285 } 286 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 287 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 288 289 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 290 291 /* vertices */ 292 for (i=0;i<n_vertices;i++) { 293 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 294 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 295 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 296 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 297 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 298 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 299 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 300 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 301 if (n_constraints) { 302 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 303 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 304 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 305 } 306 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 307 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 308 309 /* Set values in coarse basis function and subdomain part of coarse_mat */ 310 /* coarse basis functions */ 311 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 312 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 313 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 314 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 315 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 317 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 318 if (pcbddc->switch_static || pcbddc->dbg_flag) { 319 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 320 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 321 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 322 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 323 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 324 } 325 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 326 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 327 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 329 if (n_constraints) { 330 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 331 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 332 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 333 } 334 335 /* check */ 336 if (pcbddc->dbg_flag) { 337 /* assemble subdomain vector on local nodes */ 338 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 339 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 340 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 342 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 343 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 344 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 345 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 346 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 356 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 357 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 358 /* check saddle point solution */ 359 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 360 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 361 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 362 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 363 /* shift by the identity matrix */ 364 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 365 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 366 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 367 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 368 } 369 } 370 371 /* constraints */ 372 for (i=0;i<n_constraints;i++) { 373 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 374 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 375 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 376 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 377 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 378 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 379 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 381 if (n_vertices) { 382 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 383 } 384 /* Set values in coarse basis function and subdomain part of coarse_mat */ 385 /* coarse basis functions */ 386 j = i+n_vertices; /* don't touch this! */ 387 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 388 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 389 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 390 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 391 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 392 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 393 if (pcbddc->switch_static || pcbddc->dbg_flag) { 394 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 399 } 400 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 401 if (n_vertices) { 402 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 403 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 405 } 406 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 407 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 408 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 409 410 if (pcbddc->dbg_flag) { 411 /* assemble subdomain vector on nodes */ 412 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 414 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 416 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 417 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 418 /* assemble subdomain vector of lagrange multipliers */ 419 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 420 if (n_vertices) { 421 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 } 425 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 429 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 430 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 431 /* check saddle point solution */ 432 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 433 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 434 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 435 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 436 /* shift by the identity matrix */ 437 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 441 } 442 } 443 /* call assembling routines for local coarse basis */ 444 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 445 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 447 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 448 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 449 } 450 451 /* compute other basis functions for non-symmetric problems */ 452 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 453 if (!setsym || (setsym && !issym)) { 454 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 455 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 456 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 457 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 458 if (pcbddc->switch_static || pcbddc->dbg_flag) { 459 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 460 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 461 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 462 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 463 } 464 for (i=0;i<pcbddc->local_primal_size;i++) { 465 if (n_constraints) { 466 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 467 for (j=0;j<n_constraints;j++) { 468 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 469 } 470 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 471 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 472 } 473 if (i<n_vertices) { 474 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 475 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 477 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 478 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 479 if (n_constraints) { 480 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 481 } 482 } else { 483 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 484 } 485 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 486 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 487 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 490 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 492 if (i<n_vertices) { 493 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 494 } 495 if (pcbddc->switch_static || pcbddc->dbg_flag) { 496 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 498 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 499 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 500 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 501 } 502 503 if (pcbddc->dbg_flag) { 504 /* assemble subdomain vector on nodes */ 505 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 506 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 509 if (i<n_vertices) { 510 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 511 } 512 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 513 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 514 /* assemble subdomain vector of lagrange multipliers */ 515 for (j=0;j<pcbddc->local_primal_size;j++) { 516 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 517 } 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 /* check saddle point solution */ 521 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 522 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 524 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 525 /* shift by the identity matrix */ 526 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 527 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 528 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 530 } 531 } 532 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 if (pcbddc->switch_static || pcbddc->dbg_flag) { 535 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 537 } 538 } 539 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 540 /* Checking coarse_sub_mat and coarse basis functios */ 541 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 542 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 543 if (pcbddc->dbg_flag) { 544 Mat coarse_sub_mat; 545 Mat AUXMAT,TM1,TM2,TM3,TM4; 546 Mat coarse_phi_D,coarse_phi_B; 547 Mat coarse_psi_D,coarse_psi_B; 548 Mat A_II,A_BB,A_IB,A_BI; 549 MatType checkmattype=MATSEQAIJ; 550 PetscReal real_value; 551 552 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 553 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 554 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 555 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 556 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 557 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 558 if (pcbddc->coarse_psi_B) { 559 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 560 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 561 } 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 563 564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 567 if (pcbddc->coarse_psi_B) { 568 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 569 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 570 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 571 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 572 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 573 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 574 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 575 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 576 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 577 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 578 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 579 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 580 } else { 581 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 582 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 583 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 584 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 585 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 586 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 587 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 588 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 589 } 590 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 591 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 592 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 593 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 594 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 595 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 600 for (i=0;i<pcbddc->local_primal_size;i++) { 601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 602 } 603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 604 for (i=0;i<pcbddc->local_primal_size;i++) { 605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 606 } 607 if (pcbddc->coarse_psi_B) { 608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 609 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 611 } 612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 613 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 615 } 616 } 617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 618 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 619 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 620 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 621 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 622 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 623 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 624 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 625 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 626 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 627 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 628 if (pcbddc->coarse_psi_B) { 629 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 630 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 631 } 632 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 633 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 634 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 635 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 636 } 637 /* free memory */ 638 if (n_vertices) { 639 ierr = PetscFree(vertices);CHKERRQ(ierr); 640 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 641 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 643 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 644 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 645 } 646 if (n_constraints) { 647 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 648 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 651 } 652 ierr = PetscFree(auxindices);CHKERRQ(ierr); 653 /* get back data */ 654 *coarse_submat_vals_n = coarse_submat_vals; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 661 { 662 PC_IS* pcis = (PC_IS*)(pc->data); 663 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 664 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 665 /* manage repeated solves */ 666 MatReuse reuse; 667 MatStructure matstruct; 668 PetscErrorCode ierr; 669 670 PetscFunctionBegin; 671 /* get mat flags */ 672 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 673 reuse = MAT_INITIAL_MATRIX; 674 if (pc->setupcalled) { 675 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 676 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 677 if (matstruct == SAME_NONZERO_PATTERN) { 678 reuse = MAT_REUSE_MATRIX; 679 } else { 680 reuse = MAT_INITIAL_MATRIX; 681 } 682 } 683 if (reuse == MAT_INITIAL_MATRIX) { 684 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 688 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 689 } 690 691 /* transform local matrices if needed */ 692 if (pcbddc->use_change_of_basis) { 693 Mat change_mat_all; 694 PetscScalar *row_cmat_values; 695 PetscInt *row_cmat_indices; 696 PetscInt *nnz,*is_indices,*temp_indices; 697 PetscInt i,j,k,n_D,n_B; 698 699 /* Get Non-overlapping dimensions */ 700 n_B = pcis->n_B; 701 n_D = pcis->n-n_B; 702 703 /* compute nonzero structure of change of basis on all local nodes */ 704 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 705 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 706 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 707 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 708 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 709 k=1; 710 for (i=0;i<n_B;i++) { 711 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 712 nnz[is_indices[i]]=j; 713 if (k < j) k = j; 714 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 715 } 716 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 /* assemble change of basis matrix on the whole set of local dofs */ 718 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 719 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 720 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 721 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 722 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 723 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 724 for (i=0;i<n_D;i++) { 725 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 726 } 727 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 729 for (i=0;i<n_B;i++) { 730 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 731 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 732 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 733 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 734 } 735 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 738 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 739 if (i==1) { 740 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 741 } else { 742 Mat work_mat; 743 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 744 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 745 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 746 } 747 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 748 ierr = PetscFree(nnz);CHKERRQ(ierr); 749 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 750 } else { 751 /* without change of basis, the local matrix is unchanged */ 752 if (!pcbddc->local_mat) { 753 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 754 pcbddc->local_mat = matis->A; 755 } 756 } 757 758 /* get submatrices */ 759 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 760 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 761 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 762 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 769 { 770 PC_IS* pcis = (PC_IS*)(pc->data); 771 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 772 IS is_aux1,is_aux2; 773 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 774 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 775 PetscBT bitmask; 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 /* Set Non-overlapping dimensions */ 780 n_B = pcis->n_B; n_D = pcis->n - n_B; 781 /* get vertex indices from constraint matrix */ 782 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 783 /* Set number of constraints */ 784 n_constraints = pcbddc->local_primal_size-n_vertices; 785 /* create auxiliary bitmask */ 786 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 787 for (i=0;i<n_vertices;i++) { 788 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 789 } 790 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 791 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 792 for (i=0, n_R=0; i<pcis->n; i++) { 793 if (!PetscBTLookup(bitmask,i)) { 794 idx_R_local[n_R] = i; 795 n_R++; 796 } 797 } 798 ierr = PetscFree(vertices);CHKERRQ(ierr); 799 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 800 801 /* print some info if requested */ 802 if (pcbddc->dbg_flag) { 803 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 810 } 811 812 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 813 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 814 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 815 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 for (i=0; i<n_D; i++) { 817 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 818 } 819 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0, j=0; i<n_R; i++) { 821 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 822 aux_array1[j++] = i; 823 } 824 } 825 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 826 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 827 for (i=0, j=0; i<n_B; i++) { 828 if (!PetscBTLookup(bitmask,is_indices[i])) { 829 aux_array2[j++] = i; 830 } 831 } 832 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 834 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 835 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 836 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 837 838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 839 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 840 for (i=0, j=0; i<n_R; i++) { 841 if (PetscBTLookup(bitmask,idx_R_local[i])) { 842 aux_array1[j++] = i; 843 } 844 } 845 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 846 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 848 } 849 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 855 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 856 { 857 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 858 859 PetscFunctionBegin; 860 pcbddc->use_exact_dirichlet=use; 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 866 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 867 { 868 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 869 PC_IS *pcis = (PC_IS*)pc->data; 870 PC pc_temp; 871 Mat A_RR; 872 Vec vec1,vec2,vec3; 873 MatStructure matstruct; 874 PetscScalar m_one = -1.0; 875 PetscReal value; 876 PetscInt n_D,n_R,use_exact,use_exact_reduced; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 /* Creating PC contexts for local Dirichlet and Neumann problems */ 881 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 882 883 /* DIRICHLET PROBLEM */ 884 /* Matrix for Dirichlet problem is pcis->A_II */ 885 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 886 if (!pcbddc->ksp_D) { /* create object if not yet build */ 887 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 888 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 889 /* default */ 890 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 891 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 892 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 893 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 894 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 895 } 896 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 897 /* Allow user's customization */ 898 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 899 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 900 if (!n_D) { 901 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 902 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 903 } 904 /* Set Up KSP for Dirichlet problem of BDDC */ 905 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 906 /* set ksp_D into pcis data */ 907 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 908 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 909 pcis->ksp_D = pcbddc->ksp_D; 910 911 /* NEUMANN PROBLEM */ 912 /* Matrix for Neumann problem is A_RR -> we need to create it */ 913 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 914 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 915 if (!pcbddc->ksp_R) { /* create object if not yet build */ 916 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 917 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 918 /* default */ 919 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 920 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 921 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 922 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 923 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 924 } 925 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 926 /* Allow user's customization */ 927 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 928 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 929 if (!n_R) { 930 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 931 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 932 } 933 /* Set Up KSP for Neumann problem of BDDC */ 934 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 935 936 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 937 938 /* Dirichlet */ 939 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 940 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 941 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 942 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 943 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 944 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 945 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 946 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 947 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 948 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 949 /* need to be adapted? */ 950 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 951 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 952 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 953 /* print info */ 954 if (pcbddc->dbg_flag) { 955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 960 } 961 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 962 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 963 } 964 965 /* Neumann */ 966 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 967 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 968 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 969 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 970 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 971 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 972 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 973 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 974 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 975 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 976 /* need to be adapted? */ 977 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 978 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 979 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 980 /* print info */ 981 if (pcbddc->dbg_flag) { 982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 984 } 985 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 986 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 987 } 988 989 /* free Neumann problem's matrix */ 990 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 996 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 997 { 998 PetscErrorCode ierr; 999 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1000 1001 PetscFunctionBegin; 1002 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1003 if (pcbddc->local_auxmat1) { 1004 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1005 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1012 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1013 { 1014 PetscErrorCode ierr; 1015 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1016 PC_IS* pcis = (PC_IS*) (pc->data); 1017 const PetscScalar zero = 0.0; 1018 1019 PetscFunctionBegin; 1020 /* Application of PHI^T (or PSI^T) */ 1021 if (pcbddc->coarse_psi_B) { 1022 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1023 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1024 } else { 1025 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1026 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1027 } 1028 /* Scatter data of coarse_rhs */ 1029 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1030 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 1032 /* Local solution on R nodes */ 1033 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1034 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1036 if (pcbddc->switch_static) { 1037 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1038 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1039 } 1040 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1041 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1042 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1044 if (pcbddc->switch_static) { 1045 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 } 1048 1049 /* Coarse solution */ 1050 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1052 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1053 } 1054 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 1057 /* Sum contributions from two levels */ 1058 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1059 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1065 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1066 { 1067 PetscErrorCode ierr; 1068 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1069 1070 PetscFunctionBegin; 1071 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1077 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1078 { 1079 PetscErrorCode ierr; 1080 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1081 1082 PetscFunctionBegin; 1083 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* uncomment for testing purposes */ 1088 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1091 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1092 { 1093 PetscErrorCode ierr; 1094 PC_IS* pcis = (PC_IS*)(pc->data); 1095 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1096 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1097 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1098 MatType impMatType=MATSEQAIJ; 1099 /* one and zero */ 1100 PetscScalar one=1.0,zero=0.0; 1101 /* space to store constraints and their local indices */ 1102 PetscScalar *temp_quadrature_constraint; 1103 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1104 /* iterators */ 1105 PetscInt i,j,k,total_counts,temp_start_ptr; 1106 /* stuff to store connected components stored in pcbddc->mat_graph */ 1107 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1108 PetscInt n_ISForFaces,n_ISForEdges; 1109 /* near null space stuff */ 1110 MatNullSpace nearnullsp; 1111 const Vec *nearnullvecs; 1112 Vec *localnearnullsp; 1113 PetscBool nnsp_has_cnst; 1114 PetscInt nnsp_size; 1115 PetscScalar *array; 1116 /* BLAS integers */ 1117 PetscBLASInt lwork,lierr; 1118 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1119 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1120 /* LAPACK working arrays for SVD or POD */ 1121 PetscBool skip_lapack; 1122 PetscScalar *work; 1123 PetscReal *singular_vals; 1124 #if defined(PETSC_USE_COMPLEX) 1125 PetscReal *rwork; 1126 #endif 1127 #if defined(PETSC_MISSING_LAPACK_GESVD) 1128 PetscBLASInt Blas_one_2=1; 1129 PetscScalar *temp_basis,*correlation_mat; 1130 #else 1131 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1132 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1133 #endif 1134 /* change of basis */ 1135 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1136 PetscBool boolforchange; 1137 PetscBT touched,change_basis; 1138 /* auxiliary stuff */ 1139 PetscInt *nnz,*is_indices,*local_to_B; 1140 /* some quantities */ 1141 PetscInt n_vertices,total_primal_vertices; 1142 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1143 1144 1145 PetscFunctionBegin; 1146 /* Get index sets for faces, edges and vertices from graph */ 1147 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1148 pcbddc->use_vertices = PETSC_TRUE; 1149 } 1150 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1151 /* print some info */ 1152 if (pcbddc->dbg_flag) { 1153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1154 i = 0; 1155 if (ISForVertices) { 1156 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1157 } 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1162 } 1163 /* check if near null space is attached to global mat */ 1164 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1165 if (nearnullsp) { 1166 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1167 } else { /* if near null space is not provided BDDC uses constants by default */ 1168 nnsp_size = 0; 1169 nnsp_has_cnst = PETSC_TRUE; 1170 } 1171 /* get max number of constraints on a single cc */ 1172 max_constraints = nnsp_size; 1173 if (nnsp_has_cnst) max_constraints++; 1174 1175 /* 1176 Evaluate maximum storage size needed by the procedure 1177 - temp_indices will contain start index of each constraint stored as follows 1178 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1179 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1180 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1181 */ 1182 total_counts = n_ISForFaces+n_ISForEdges; 1183 total_counts *= max_constraints; 1184 n_vertices = 0; 1185 if (ISForVertices) { 1186 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1187 } 1188 total_counts += n_vertices; 1189 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1190 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1191 total_counts = 0; 1192 max_size_of_constraint = 0; 1193 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1194 if (i<n_ISForEdges) { 1195 used_IS = &ISForEdges[i]; 1196 } else { 1197 used_IS = &ISForFaces[i-n_ISForEdges]; 1198 } 1199 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1200 total_counts += j; 1201 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1202 } 1203 total_counts *= max_constraints; 1204 total_counts += n_vertices; 1205 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1206 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1207 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1208 /* local to boundary numbering */ 1209 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1210 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1211 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1212 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1213 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1214 /* get local part of global near null space vectors */ 1215 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1216 for (k=0;k<nnsp_size;k++) { 1217 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1218 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 } 1221 1222 /* whether or not to skip lapack calls */ 1223 skip_lapack = PETSC_TRUE; 1224 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1225 1226 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1227 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1228 PetscScalar temp_work; 1229 #if defined(PETSC_MISSING_LAPACK_GESVD) 1230 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1231 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1232 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1233 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1234 #if defined(PETSC_USE_COMPLEX) 1235 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1236 #endif 1237 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1238 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1239 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1240 lwork = -1; 1241 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1242 #if !defined(PETSC_USE_COMPLEX) 1243 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1244 #else 1245 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1246 #endif 1247 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1248 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1249 #else /* on missing GESVD */ 1250 /* SVD */ 1251 PetscInt max_n,min_n; 1252 max_n = max_size_of_constraint; 1253 min_n = max_constraints; 1254 if (max_size_of_constraint < max_constraints) { 1255 min_n = max_size_of_constraint; 1256 max_n = max_constraints; 1257 } 1258 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1259 #if defined(PETSC_USE_COMPLEX) 1260 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1261 #endif 1262 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1263 lwork = -1; 1264 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1265 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1266 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1268 #if !defined(PETSC_USE_COMPLEX) 1269 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1270 #else 1271 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1272 #endif 1273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1274 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1275 #endif /* on missing GESVD */ 1276 /* Allocate optimal workspace */ 1277 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1278 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1279 } 1280 /* Now we can loop on constraining sets */ 1281 total_counts = 0; 1282 temp_indices[0] = 0; 1283 /* vertices */ 1284 if (ISForVertices) { 1285 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1286 if (nnsp_has_cnst) { /* consider all vertices */ 1287 for (i=0;i<n_vertices;i++) { 1288 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1289 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1290 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1291 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1292 total_counts++; 1293 } 1294 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1295 PetscBool used_vertex; 1296 for (i=0;i<n_vertices;i++) { 1297 used_vertex = PETSC_FALSE; 1298 k = 0; 1299 while (!used_vertex && k<nnsp_size) { 1300 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1301 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1302 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1303 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1304 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1305 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1306 total_counts++; 1307 used_vertex = PETSC_TRUE; 1308 } 1309 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1310 k++; 1311 } 1312 } 1313 } 1314 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1315 n_vertices = total_counts; 1316 } 1317 1318 /* edges and faces */ 1319 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1320 if (i<n_ISForEdges) { 1321 used_IS = &ISForEdges[i]; 1322 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1323 } else { 1324 used_IS = &ISForFaces[i-n_ISForEdges]; 1325 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1326 } 1327 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1328 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1329 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1330 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1331 /* change of basis should not be performed on local periodic nodes */ 1332 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1333 if (nnsp_has_cnst) { 1334 PetscScalar quad_value; 1335 temp_constraints++; 1336 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1337 for (j=0;j<size_of_constraint;j++) { 1338 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1339 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1340 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1341 } 1342 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1343 if (boolforchange) { 1344 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1345 } 1346 total_counts++; 1347 } 1348 for (k=0;k<nnsp_size;k++) { 1349 PetscReal real_value; 1350 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1351 for (j=0;j<size_of_constraint;j++) { 1352 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1353 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1354 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1355 } 1356 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1357 /* check if array is null on the connected component */ 1358 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1359 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1360 if (real_value > 0.0) { /* keep indices and values */ 1361 temp_constraints++; 1362 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1363 if (boolforchange) { 1364 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1365 } 1366 total_counts++; 1367 } 1368 } 1369 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1370 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1371 if (!pcbddc->use_nnsp_true) { 1372 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1373 1374 #if defined(PETSC_MISSING_LAPACK_GESVD) 1375 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1376 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1377 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1378 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1379 from that computed using LAPACKgesvd 1380 -> This is due to a different computation of eigenvectors in LAPACKheev 1381 -> The quality of the POD-computed basis will be the same */ 1382 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1383 /* Store upper triangular part of correlation matrix */ 1384 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1385 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1386 for (j=0;j<temp_constraints;j++) { 1387 for (k=0;k<j+1;k++) { 1388 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1389 } 1390 } 1391 /* compute eigenvalues and eigenvectors of correlation matrix */ 1392 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1393 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1394 #if !defined(PETSC_USE_COMPLEX) 1395 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1396 #else 1397 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1398 #endif 1399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1400 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1401 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1402 j=0; 1403 while (j < temp_constraints && singular_vals[j] < tol) j++; 1404 total_counts=total_counts-j; 1405 /* scale and copy POD basis into used quadrature memory */ 1406 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1407 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1408 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1410 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1411 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1412 if (j<temp_constraints) { 1413 PetscInt ii; 1414 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1415 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1416 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1417 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1418 for (k=0;k<temp_constraints-j;k++) { 1419 for (ii=0;ii<size_of_constraint;ii++) { 1420 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1421 } 1422 } 1423 } 1424 #else /* on missing GESVD */ 1425 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1426 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1427 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1428 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1429 #if !defined(PETSC_USE_COMPLEX) 1430 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1431 #else 1432 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1433 #endif 1434 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1435 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1436 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1437 k = temp_constraints; 1438 if (k > size_of_constraint) k = size_of_constraint; 1439 j = 0; 1440 while (j < k && singular_vals[k-j-1] < tol) j++; 1441 total_counts = total_counts-temp_constraints+k-j; 1442 #endif /* on missing GESVD */ 1443 } 1444 } 1445 /* free index sets of faces, edges and vertices */ 1446 for (i=0;i<n_ISForFaces;i++) { 1447 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1448 } 1449 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1450 for (i=0;i<n_ISForEdges;i++) { 1451 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1452 } 1453 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1454 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1455 1456 /* free workspace */ 1457 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1458 ierr = PetscFree(work);CHKERRQ(ierr); 1459 #if defined(PETSC_USE_COMPLEX) 1460 ierr = PetscFree(rwork);CHKERRQ(ierr); 1461 #endif 1462 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1463 #if defined(PETSC_MISSING_LAPACK_GESVD) 1464 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1465 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1466 #endif 1467 } 1468 for (k=0;k<nnsp_size;k++) { 1469 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1470 } 1471 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1472 1473 /* set quantities in pcbddc data structure */ 1474 /* n_vertices defines the number of subdomain corners in the primal space */ 1475 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1476 pcbddc->local_primal_size = total_counts; 1477 pcbddc->n_vertices = n_vertices; 1478 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1479 1480 /* Create constraint matrix */ 1481 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1482 /* so we need to set it up properly either with or without change of basis */ 1483 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1484 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1485 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1486 /* array to compute a local numbering of constraints : vertices first then constraints */ 1487 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1488 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1489 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1490 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1491 /* auxiliary stuff for basis change */ 1492 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1493 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1494 1495 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1496 total_primal_vertices=0; 1497 for (i=0;i<pcbddc->local_primal_size;i++) { 1498 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1499 if (size_of_constraint == 1) { 1500 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1501 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1502 aux_primal_minloc[total_primal_vertices]=0; 1503 total_primal_vertices++; 1504 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1505 PetscInt min_loc,min_index; 1506 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1507 /* find first untouched local node */ 1508 k = 0; 1509 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1510 min_index = global_indices[k]; 1511 min_loc = k; 1512 /* search the minimum among global nodes already untouched on the cc */ 1513 for (k=1;k<size_of_constraint;k++) { 1514 /* there can be more than one constraint on a single connected component */ 1515 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1516 min_index = global_indices[k]; 1517 min_loc = k; 1518 } 1519 } 1520 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1521 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1522 aux_primal_minloc[total_primal_vertices]=min_loc; 1523 total_primal_vertices++; 1524 } 1525 } 1526 /* free workspace */ 1527 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1528 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1529 /* permute indices in order to have a sorted set of vertices */ 1530 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1531 1532 /* nonzero structure of constraint matrix */ 1533 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1534 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1535 j=total_primal_vertices; 1536 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1537 if (!PetscBTLookup(change_basis,i)) { 1538 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1539 j++; 1540 } 1541 } 1542 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1543 ierr = PetscFree(nnz);CHKERRQ(ierr); 1544 /* set values in constraint matrix */ 1545 for (i=0;i<total_primal_vertices;i++) { 1546 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1547 } 1548 total_counts = total_primal_vertices; 1549 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1550 if (!PetscBTLookup(change_basis,i)) { 1551 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1552 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1553 total_counts++; 1554 } 1555 } 1556 /* assembling */ 1557 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1558 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1559 /* 1560 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1561 */ 1562 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1563 if (pcbddc->use_change_of_basis) { 1564 PetscBool qr_needed = PETSC_FALSE; 1565 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1566 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1567 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1568 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1569 /* work arrays */ 1570 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1571 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1572 /* nonzeros per row */ 1573 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1574 if (PetscBTLookup(change_basis,i)) { 1575 qr_needed = PETSC_TRUE; 1576 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1577 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1578 } 1579 } 1580 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1581 ierr = PetscFree(nnz);CHKERRQ(ierr); 1582 /* Set initial identity in the matrix */ 1583 for (i=0;i<pcis->n_B;i++) { 1584 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1585 } 1586 1587 /* Now we loop on the constraints which need a change of basis */ 1588 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1589 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1590 /* Change of basis matrix T computed via QR decomposition of constraints */ 1591 if (qr_needed) { 1592 /* dual and primal dofs on a single cc */ 1593 PetscInt dual_dofs,primal_dofs; 1594 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1595 PetscInt primal_counter; 1596 /* working stuff for GEQRF */ 1597 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1598 PetscBLASInt lqr_work; 1599 /* working stuff for UNGQR */ 1600 PetscScalar *gqr_work,lgqr_work_t; 1601 PetscBLASInt lgqr_work; 1602 /* working stuff for TRTRS */ 1603 PetscScalar *trs_rhs; 1604 PetscBLASInt Blas_NRHS; 1605 /* pointers for values insertion into change of basis matrix */ 1606 PetscInt *start_rows,*start_cols; 1607 PetscScalar *start_vals; 1608 /* working stuff for values insertion */ 1609 PetscBT is_primal; 1610 1611 /* space to store Q */ 1612 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1613 /* first we issue queries for optimal work */ 1614 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1615 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1616 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1617 lqr_work = -1; 1618 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1619 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1620 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1621 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1622 lgqr_work = -1; 1623 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1625 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1626 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1627 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1628 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1629 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1630 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1631 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1632 /* array to store scaling factors for reflectors */ 1633 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1634 /* array to store rhs and solution of triangular solver */ 1635 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1636 /* array to store whether a node is primal or not */ 1637 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1638 for (i=0;i<total_primal_vertices;i++) { 1639 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1640 } 1641 1642 /* allocating workspace for check */ 1643 if (pcbddc->dbg_flag) { 1644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1645 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1646 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1647 } 1648 1649 /* loop on constraints and see whether or not they need a change of basis */ 1650 /* -> using implicit ordering contained in temp_indices data */ 1651 total_counts = pcbddc->n_vertices; 1652 primal_counter = total_counts; 1653 while (total_counts<pcbddc->local_primal_size) { 1654 primal_dofs = 1; 1655 if (PetscBTLookup(change_basis,total_counts)) { 1656 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1657 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1658 primal_dofs++; 1659 } 1660 /* get constraint info */ 1661 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1662 dual_dofs = size_of_constraint-primal_dofs; 1663 1664 /* copy quadrature constraints for change of basis check */ 1665 if (pcbddc->dbg_flag) { 1666 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1667 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1668 } 1669 1670 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1671 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1672 1673 /* compute QR decomposition of constraints */ 1674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1675 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1676 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1677 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1678 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1679 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1680 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1681 1682 /* explictly compute R^-T */ 1683 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1684 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1685 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1689 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1690 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1691 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1692 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1693 1694 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1695 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1696 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1697 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1698 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1699 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1700 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1701 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1702 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1703 1704 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1705 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1706 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1709 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1711 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1713 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1714 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1715 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1716 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1717 1718 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1719 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1720 /* insert cols for primal dofs */ 1721 for (j=0;j<primal_dofs;j++) { 1722 start_vals = &qr_basis[j*size_of_constraint]; 1723 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1724 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1725 } 1726 /* insert cols for dual dofs */ 1727 for (j=0,k=0;j<dual_dofs;k++) { 1728 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1729 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1730 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1731 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1732 j++; 1733 } 1734 } 1735 1736 /* check change of basis */ 1737 if (pcbddc->dbg_flag) { 1738 PetscInt ii,jj; 1739 PetscBool valid_qr=PETSC_TRUE; 1740 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1741 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1743 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1745 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1746 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1747 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1748 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1749 for (jj=0;jj<size_of_constraint;jj++) { 1750 for (ii=0;ii<primal_dofs;ii++) { 1751 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1752 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1753 } 1754 } 1755 if (!valid_qr) { 1756 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1757 for (jj=0;jj<size_of_constraint;jj++) { 1758 for (ii=0;ii<primal_dofs;ii++) { 1759 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1760 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1761 } 1762 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1763 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1764 } 1765 } 1766 } 1767 } else { 1768 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1769 } 1770 } 1771 /* increment primal counter */ 1772 primal_counter += primal_dofs; 1773 } else { 1774 if (pcbddc->dbg_flag) { 1775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1776 } 1777 } 1778 /* increment constraint counter total_counts */ 1779 total_counts += primal_dofs; 1780 } 1781 if (pcbddc->dbg_flag) { 1782 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1783 ierr = PetscFree(work);CHKERRQ(ierr); 1784 } 1785 /* free workspace */ 1786 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1787 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1788 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1789 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1790 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1791 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1792 } 1793 /* assembling */ 1794 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 /* 1797 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1798 */ 1799 } 1800 /* free workspace */ 1801 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1802 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1803 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1804 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1805 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1806 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1807 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1808 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1814 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1815 { 1816 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1817 PC_IS *pcis = (PC_IS*)pc->data; 1818 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1819 PetscInt bs,ierr,i,vertex_size; 1820 PetscViewer viewer=pcbddc->dbg_viewer; 1821 1822 PetscFunctionBegin; 1823 /* Init local Graph struct */ 1824 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1825 1826 /* Check validity of the csr graph passed in by the user */ 1827 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1828 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1829 } 1830 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1831 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1832 Mat mat_adj; 1833 const PetscInt *xadj,*adjncy; 1834 PetscBool flg_row=PETSC_TRUE; 1835 1836 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1837 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1838 if (!flg_row) { 1839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1840 } 1841 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1842 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1843 if (!flg_row) { 1844 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1845 } 1846 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1847 } 1848 1849 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1850 vertex_size = 1; 1851 if (!pcbddc->n_ISForDofs) { 1852 IS *custom_ISForDofs; 1853 1854 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1855 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1856 for (i=0;i<bs;i++) { 1857 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1858 } 1859 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1860 /* remove my references to IS objects */ 1861 for (i=0;i<bs;i++) { 1862 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1863 } 1864 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1865 } else { /* mat block size as vertex size (used for elasticity) */ 1866 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1867 } 1868 1869 /* Setup of Graph */ 1870 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1871 1872 /* Graph's connected components analysis */ 1873 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1874 1875 /* print some info to stdout */ 1876 if (pcbddc->dbg_flag) { 1877 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1878 } 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1884 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1885 { 1886 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1887 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 n = 0; 1892 vertices = 0; 1893 if (pcbddc->ConstraintMatrix) { 1894 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1895 for (i=0;i<local_primal_size;i++) { 1896 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1897 if (size_of_constraint == 1) n++; 1898 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1899 } 1900 if (vertices_idx) { 1901 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1902 n = 0; 1903 for (i=0;i<local_primal_size;i++) { 1904 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1905 if (size_of_constraint == 1) { 1906 vertices[n++]=row_cmat_indices[0]; 1907 } 1908 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1909 } 1910 } 1911 } 1912 *n_vertices = n; 1913 if (vertices_idx) *vertices_idx = vertices; 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1919 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1920 { 1921 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1922 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1923 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1924 PetscBT touched; 1925 PetscErrorCode ierr; 1926 1927 /* This function assumes that the number of local constraints per connected component 1928 is not greater than the number of nodes defined for the connected component 1929 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1930 PetscFunctionBegin; 1931 n = 0; 1932 constraints_index = 0; 1933 if (pcbddc->ConstraintMatrix) { 1934 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1935 max_size_of_constraint = 0; 1936 for (i=0;i<local_primal_size;i++) { 1937 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1938 if (size_of_constraint > 1) { 1939 n++; 1940 } 1941 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1942 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1943 } 1944 if (constraints_idx) { 1945 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1946 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1947 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1948 n = 0; 1949 for (i=0;i<local_primal_size;i++) { 1950 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1951 if (size_of_constraint > 1) { 1952 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1953 /* find first untouched local node */ 1954 j = 0; 1955 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1956 min_index = row_cmat_global_indices[j]; 1957 min_loc = j; 1958 /* search the minimum among nodes not yet touched on the connected component 1959 since there can be more than one constraint on a single cc */ 1960 for (j=1;j<size_of_constraint;j++) { 1961 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1962 min_index = row_cmat_global_indices[j]; 1963 min_loc = j; 1964 } 1965 } 1966 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1967 constraints_index[n++] = row_cmat_indices[min_loc]; 1968 } 1969 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1970 } 1971 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1972 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1973 } 1974 } 1975 *n_constraints = n; 1976 if (constraints_idx) *constraints_idx = constraints_index; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* the next two functions has been adapted from pcis.c */ 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "PCBDDCApplySchur" 1983 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 1984 { 1985 PetscErrorCode ierr; 1986 PC_IS *pcis = (PC_IS*)(pc->data); 1987 1988 PetscFunctionBegin; 1989 if (!vec2_B) { vec2_B = v; } 1990 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 1991 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 1992 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 1993 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 1994 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2000 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2001 { 2002 PetscErrorCode ierr; 2003 PC_IS *pcis = (PC_IS*)(pc->data); 2004 2005 PetscFunctionBegin; 2006 if (!vec2_B) { vec2_B = v; } 2007 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2008 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2009 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2010 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2011 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "PCBDDCSubsetNumbering" 2017 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2018 { 2019 Vec local_vec,global_vec; 2020 IS seqis,paris; 2021 VecScatter scatter_ctx; 2022 PetscScalar *array; 2023 PetscInt *temp_global_dofs; 2024 PetscScalar globalsum; 2025 PetscInt i,j,s; 2026 PetscInt nlocals,first_index,old_index,max_local; 2027 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2028 PetscMPIInt *dof_sizes,*dof_displs; 2029 PetscBool first_found; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 /* mpi buffers */ 2034 MPI_Comm_size(comm,&size_prec_comm); 2035 MPI_Comm_rank(comm,&rank_prec_comm); 2036 j = ( !rank_prec_comm ? size_prec_comm : 0); 2037 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2038 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2039 /* get maximum size of subset */ 2040 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2041 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2042 max_local = 0; 2043 if (n_local_dofs) { 2044 max_local = temp_global_dofs[0]; 2045 for (i=1;i<n_local_dofs;i++) { 2046 if (max_local < temp_global_dofs[i] ) { 2047 max_local = temp_global_dofs[i]; 2048 } 2049 } 2050 } 2051 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2052 max_global++; 2053 max_local = 0; 2054 if (n_local_dofs) { 2055 max_local = local_dofs[0]; 2056 for (i=1;i<n_local_dofs;i++) { 2057 if (max_local < local_dofs[i] ) { 2058 max_local = local_dofs[i]; 2059 } 2060 } 2061 } 2062 max_local++; 2063 /* allocate workspace */ 2064 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2065 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2066 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2067 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2068 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2069 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2070 /* create scatter */ 2071 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2072 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2073 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2074 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2075 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2076 /* init array */ 2077 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2078 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2079 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2080 if (local_dofs_mult) { 2081 for (i=0;i<n_local_dofs;i++) { 2082 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2083 } 2084 } else { 2085 for (i=0;i<n_local_dofs;i++) { 2086 array[local_dofs[i]]=1.0; 2087 } 2088 } 2089 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2090 /* scatter into global vec and get total number of global dofs */ 2091 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2092 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2094 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2095 /* Fill global_vec with cumulative function for global numbering */ 2096 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2097 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2098 nlocals = 0; 2099 first_index = -1; 2100 first_found = PETSC_FALSE; 2101 for (i=0;i<s;i++) { 2102 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2103 first_found = PETSC_TRUE; 2104 first_index = i; 2105 } 2106 nlocals += (PetscInt)PetscRealPart(array[i]); 2107 } 2108 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2109 if (!rank_prec_comm) { 2110 dof_displs[0]=0; 2111 for (i=1;i<size_prec_comm;i++) { 2112 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2113 } 2114 } 2115 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2116 if (first_found) { 2117 array[first_index] += (PetscScalar)nlocals; 2118 old_index = first_index; 2119 for (i=first_index+1;i<s;i++) { 2120 if (PetscRealPart(array[i]) > 0.0) { 2121 array[i] += array[old_index]; 2122 old_index = i; 2123 } 2124 } 2125 } 2126 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2127 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2128 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2129 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2130 /* get global ordering of local dofs */ 2131 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2132 if (local_dofs_mult) { 2133 for (i=0;i<n_local_dofs;i++) { 2134 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2135 } 2136 } else { 2137 for (i=0;i<n_local_dofs;i++) { 2138 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2139 } 2140 } 2141 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2142 /* free workspace */ 2143 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2144 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2145 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2146 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2147 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2148 /* return pointer to global ordering of local dofs */ 2149 *global_numbering_subset = temp_global_dofs; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2155 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2156 { 2157 PetscInt i,j; 2158 PetscScalar *alphas; 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 /* this implements stabilized Gram-Schmidt */ 2163 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2164 for (i=0;i<n;i++) { 2165 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2166 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2167 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2168 } 2169 ierr = PetscFree(alphas);CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatConvert_IS_AIJ" 2176 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2177 { 2178 Mat new_mat; 2179 Mat_IS *matis = (Mat_IS*)(mat->data); 2180 /* info on mat */ 2181 PetscInt bs,rows,cols; 2182 PetscInt lrows,lcols; 2183 PetscInt local_rows,local_cols; 2184 PetscMPIInt nsubdomains; 2185 /* preallocation */ 2186 Vec vec_dnz,vec_onz; 2187 PetscScalar *my_dnz,*my_onz,*array; 2188 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2189 PetscInt index_row,index_col,owner; 2190 PetscInt *local_indices,*global_indices; 2191 /* work */ 2192 PetscInt i,j; 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 /* CHECKS */ 2197 /* get info from mat */ 2198 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2199 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2200 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2201 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2202 2203 /* MAT_INITIAL_MATRIX */ 2204 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2205 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2206 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2207 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2208 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2209 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2210 2211 /* 2212 preallocation 2213 */ 2214 2215 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2216 /* 2217 Some vectors are needed to sum up properly on shared interface dofs. 2218 Note that preallocation is not exact, since it overestimates nonzeros 2219 */ 2220 /* 2221 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2222 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2223 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2224 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2225 */ 2226 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2227 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2228 /* All processes need to compute entire row ownership */ 2229 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2230 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2231 for (i=0;i<nsubdomains;i++) { 2232 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2233 row_ownership[j]=i; 2234 } 2235 } 2236 /* map indices of local mat to global values */ 2237 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2238 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2239 for (i=0;i<local_rows;i++) local_indices[i]=i; 2240 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2241 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2242 2243 /* 2244 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2245 then, they will be summed up properly. This way, preallocation is always sufficient 2246 */ 2247 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2248 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2249 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2250 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2251 for (i=0;i<local_rows;i++) { 2252 index_row = global_indices[i]; 2253 for (j=i;j<local_rows;j++) { 2254 owner = row_ownership[index_row]; 2255 index_col = global_indices[j]; 2256 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2257 my_dnz[i] += 1.0; 2258 } else { /* offdiag block */ 2259 my_onz[i] += 1.0; 2260 } 2261 /* same as before, interchanging rows and cols */ 2262 if (i != j) { 2263 owner = row_ownership[index_col]; 2264 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2265 my_dnz[j] += 1.0; 2266 } else { 2267 my_onz[j] += 1.0; 2268 } 2269 } 2270 } 2271 } 2272 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2273 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2274 if (local_rows) { /* multilevel guard */ 2275 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2276 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2277 } 2278 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2279 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2280 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2281 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2282 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2283 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2284 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2285 2286 /* set computed preallocation in dnz and onz */ 2287 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2288 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2289 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2290 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2291 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2292 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2293 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2294 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2295 2296 /* Resize preallocation if overestimated */ 2297 for (i=0;i<lrows;i++) { 2298 dnz[i] = PetscMin(dnz[i],lcols); 2299 onz[i] = PetscMin(onz[i],cols-lcols); 2300 } 2301 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2302 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2303 2304 /* 2305 Set values. Very Basic. 2306 */ 2307 for (i=0;i<local_rows;i++) { 2308 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2309 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2310 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2311 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2312 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2313 } 2314 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2315 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2316 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 2318 /* get back mat */ 2319 *M = new_mat; 2320 PetscFunctionReturn(0); 2321 } 2322 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatISSubassemble_Private" 2325 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2326 { 2327 Mat subdomain_adj; 2328 IS new_ranks,ranks_send_to; 2329 MatPartitioning partitioner; 2330 Mat_IS *matis; 2331 PetscInt n_neighs,*neighs,*n_shared,**shared; 2332 PetscInt prank; 2333 PetscMPIInt size,rank,color; 2334 PetscInt *xadj,*adjncy,*oldranks; 2335 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2336 PetscInt i,j,n_subdomains,local_size,threshold=0; 2337 PetscErrorCode ierr; 2338 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2339 PetscSubcomm subcomm; 2340 2341 PetscFunctionBegin; 2342 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2343 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2344 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2345 2346 /* Get info on mapping */ 2347 matis = (Mat_IS*)(mat->data); 2348 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2349 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2350 2351 /* build local CSR graph of subdomains' connectivity */ 2352 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2353 xadj[0] = 0; 2354 xadj[1] = PetscMax(n_neighs-1,0); 2355 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2356 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2357 2358 if (threshold) { 2359 PetscInt* count,min_threshold; 2360 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2361 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2362 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2363 for (j=0;j<n_shared[i];j++) { 2364 count[shared[i][j]] += 1; 2365 } 2366 } 2367 /* adapt threshold since we dont want to lose significant connections */ 2368 min_threshold = n_neighs; 2369 for (i=1;i<n_neighs;i++) { 2370 for (j=0;j<n_shared[i];j++) { 2371 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2372 } 2373 } 2374 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2375 threshold = PetscMax(min_threshold+1,threshold); 2376 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2377 xadj[1] = 0; 2378 for (i=1;i<n_neighs;i++) { 2379 for (j=0;j<n_shared[i];j++) { 2380 if (count[shared[i][j]] < threshold) { 2381 adjncy[xadj[1]] = neighs[i]; 2382 adjncy_wgt[xadj[1]] = n_shared[i]; 2383 xadj[1]++; 2384 break; 2385 } 2386 } 2387 } 2388 ierr = PetscFree(count);CHKERRQ(ierr); 2389 } else { 2390 if (xadj[1]) { 2391 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2392 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2393 } 2394 } 2395 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2396 if (use_square) { 2397 for (i=0;i<xadj[1];i++) { 2398 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2399 } 2400 } 2401 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2402 2403 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2404 2405 /* 2406 Restrict work on active processes only. 2407 */ 2408 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2409 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2410 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2411 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2412 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr); 2413 if (color) { 2414 ierr = PetscFree(xadj);CHKERRQ(ierr); 2415 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2416 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2417 } else { 2418 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2419 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2420 prank = rank; 2421 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2422 for (i=0;i<size;i++) { 2423 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2424 } 2425 for (i=0;i<xadj[1];i++) { 2426 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2427 } 2428 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2429 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2430 n_subdomains = (PetscInt)size; 2431 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2432 2433 /* Partition */ 2434 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2435 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2436 if (use_vwgt) { 2437 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2438 v_wgt[0] = local_size; 2439 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2440 } 2441 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2442 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2443 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2444 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2445 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2446 ierr = ISView(new_ranks,0);CHKERRQ(ierr); 2447 2448 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2449 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2450 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2451 /* clean up */ 2452 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2453 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2454 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2455 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2456 } 2457 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2458 2459 /* assemble parallel IS for sends */ 2460 i = 1; 2461 if (color) i=0; 2462 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2463 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2464 2465 /* get back IS */ 2466 *is_sends = ranks_send_to; 2467 PetscFunctionReturn(0); 2468 } 2469 2470 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatISSubassemble" 2474 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2475 { 2476 Mat local_mat,new_mat; 2477 Mat_IS *matis; 2478 IS is_sends_internal; 2479 PetscInt rows,cols; 2480 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2481 PetscBool ismatis,isdense; 2482 ISLocalToGlobalMapping l2gmap; 2483 PetscInt* l2gmap_indices; 2484 const PetscInt* is_indices; 2485 MatType new_local_type; 2486 MatTypePrivate new_local_type_private; 2487 /* buffers */ 2488 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2489 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2490 /* MPI */ 2491 MPI_Comm comm; 2492 PetscMPIInt n_sends,n_recvs,commsize; 2493 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2494 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2495 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2496 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 /* checks */ 2501 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2502 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2503 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2504 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2505 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2506 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2507 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2508 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2509 PetscValidLogicalCollectiveInt(mat,bs,0); 2510 /* prepare IS for sending if not provided */ 2511 if (!is_sends) { 2512 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2513 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2514 } else { 2515 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2516 is_sends_internal = is_sends; 2517 } 2518 2519 /* get pointer of MATIS data */ 2520 matis = (Mat_IS*)mat->data; 2521 2522 /* get comm */ 2523 comm = PetscObjectComm((PetscObject)mat); 2524 2525 /* compute number of sends */ 2526 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2527 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2528 2529 /* compute number of receives */ 2530 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2531 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2532 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2533 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2534 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2535 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2536 ierr = PetscFree(iflags);CHKERRQ(ierr); 2537 2538 /* prepare send/receive buffers */ 2539 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2540 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2541 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2542 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2543 2544 /* Get data from local mat */ 2545 if (!isdense) { 2546 /* TODO: See below some guidelines on how to prepare the local buffers */ 2547 /* 2548 send_buffer_vals should contain the raw values of the local matrix 2549 send_buffer_idxs should contain: 2550 - MatType_PRIVATE type 2551 - PetscInt size_of_l2gmap 2552 - PetscInt global_row_indices[size_of_l2gmap] 2553 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2554 */ 2555 } else { 2556 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2557 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2558 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2559 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2560 send_buffer_idxs[1] = i; 2561 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2562 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2563 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2564 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2565 for (i=0;i<n_sends;i++) { 2566 ilengths_vals[is_indices[i]] = len*len; 2567 ilengths_idxs[is_indices[i]] = len+2; 2568 } 2569 } 2570 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2571 buf_size_idxs = 0; 2572 buf_size_vals = 0; 2573 for (i=0;i<n_recvs;i++) { 2574 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2575 buf_size_vals += (PetscInt)olengths_vals[i]; 2576 } 2577 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2578 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2579 2580 /* get new tags for clean communications */ 2581 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2582 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2583 2584 /* allocate for requests */ 2585 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2586 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2587 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2588 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2589 2590 /* communications */ 2591 ptr_idxs = recv_buffer_idxs; 2592 ptr_vals = recv_buffer_vals; 2593 for (i=0;i<n_recvs;i++) { 2594 source_dest = onodes[i]; 2595 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2596 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2597 ptr_idxs += olengths_idxs[i]; 2598 ptr_vals += olengths_vals[i]; 2599 } 2600 for (i=0;i<n_sends;i++) { 2601 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2602 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2603 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2604 } 2605 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2606 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2607 2608 /* assemble new l2g map */ 2609 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2610 ptr_idxs = recv_buffer_idxs; 2611 buf_size_idxs = 0; 2612 for (i=0;i<n_recvs;i++) { 2613 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2614 ptr_idxs += olengths_idxs[i]; 2615 } 2616 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2617 ptr_idxs = recv_buffer_idxs; 2618 buf_size_idxs = 0; 2619 for (i=0;i<n_recvs;i++) { 2620 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2621 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2622 ptr_idxs += olengths_idxs[i]; 2623 } 2624 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2625 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2626 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2627 2628 /* infer new local matrix type from received local matrices type */ 2629 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2630 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2631 new_local_type_private = MATAIJ_PRIVATE; 2632 new_local_type = MATSEQAIJ; 2633 if (n_recvs) { 2634 new_local_type_private = send_buffer_idxs[0]; 2635 ptr_idxs = recv_buffer_idxs; 2636 for (i=0;i<n_recvs;i++) { 2637 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2638 new_local_type_private = MATAIJ_PRIVATE; 2639 break; 2640 } 2641 ptr_idxs += olengths_idxs[i]; 2642 } 2643 switch (new_local_type_private) { 2644 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2645 new_local_type = MATSEQAIJ; 2646 bs = 1; 2647 break; 2648 case MATAIJ_PRIVATE: 2649 new_local_type = MATSEQAIJ; 2650 bs = 1; 2651 break; 2652 case MATBAIJ_PRIVATE: 2653 new_local_type = MATSEQBAIJ; 2654 break; 2655 case MATSBAIJ_PRIVATE: 2656 new_local_type = MATSEQSBAIJ; 2657 break; 2658 default: 2659 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2660 break; 2661 } 2662 } 2663 2664 /* create MATIS object */ 2665 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2666 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2667 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2668 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2669 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2670 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2671 2672 /* set values */ 2673 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2674 ptr_vals = recv_buffer_vals; 2675 ptr_idxs = recv_buffer_idxs; 2676 for (i=0;i<n_recvs;i++) { 2677 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2678 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2679 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2680 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2681 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2682 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2683 } 2684 ptr_idxs += olengths_idxs[i]; 2685 ptr_vals += olengths_vals[i]; 2686 } 2687 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2688 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2689 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2690 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 2692 { /* check */ 2693 Vec lvec,rvec; 2694 PetscReal infty_error; 2695 2696 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2697 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2698 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2699 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2700 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2701 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2702 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2703 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2704 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2705 } 2706 2707 /* free workspace */ 2708 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2709 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2710 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2711 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2712 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2713 if (isdense) { 2714 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2715 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2716 } else { 2717 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2718 } 2719 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2720 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2721 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2722 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2723 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2724 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2725 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2726 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2727 ierr = PetscFree(onodes);CHKERRQ(ierr); 2728 /* get back new mat */ 2729 *mat_n = new_mat; 2730 PetscFunctionReturn(0); 2731 } 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2735 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2736 { 2737 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2738 PC_IS *pcis = (PC_IS*)pc->data; 2739 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2740 MatNullSpace CoarseNullSpace=NULL; 2741 ISLocalToGlobalMapping coarse_islg; 2742 IS coarse_is; 2743 PetscInt coarse_size,*local_primal_indices=NULL; 2744 PCType coarse_pc_type; 2745 KSPType coarse_ksp_type; 2746 PetscBool use_multilevel,setsym,issym; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 /* Assign global numbering to coarse dofs */ 2751 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2752 2753 /* adapt coarse problem type if needed */ 2754 use_multilevel = PETSC_FALSE; 2755 if (pcbddc->current_level < pcbddc->max_levels) { 2756 PetscInt im_active,active_procs; 2757 /* count "active processes" */ 2758 im_active = !!(pcis->n); 2759 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2760 if (active_procs/pcbddc->coarsening_ratio < 2) { 2761 if (pcbddc->dbg_flag) { 2762 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2764 } 2765 } else { 2766 use_multilevel = PETSC_TRUE; 2767 } 2768 } else { 2769 if (pcbddc->dbg_flag) { 2770 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 2771 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2772 } 2773 } 2774 2775 /* creates MATIS object for coarse matrix */ 2776 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2777 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2778 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2779 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2780 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2781 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 2784 /* Convert or subassemble MATIS depending on chosen coarse solver approach */ 2785 if (use_multilevel) { 2786 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2787 /* just for now */ 2788 coarse_pc_type = PCBDDC; 2789 coarse_ksp_type = KSPRICHARDSON; 2790 } else { 2791 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2792 /* just for now */ 2793 coarse_pc_type = PCREDUNDANT; 2794 coarse_ksp_type = KSPPREONLY; 2795 } 2796 2797 /* propagate symmetry info to coarse matrix */ 2798 issym = PETSC_FALSE; 2799 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2800 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 2801 2802 /* create local to global scatters coarse problem */ 2803 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2804 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2805 2806 /* free memory no longer needed */ 2807 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2808 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2809 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2810 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 2811 2812 /* Compute coarse null space */ 2813 if (pcbddc->NullSpace) { 2814 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 2815 } 2816 2817 /* KSP for coarse problem */ 2818 { 2819 PC pc_temp; 2820 PetscBool isbddc,isnn; 2821 PetscInt max_it; 2822 MatStructure matstruct; 2823 2824 if (!pcbddc->coarse_ksp) { /* create the coarse KSP once */ 2825 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2826 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2827 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2828 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2829 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2830 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2831 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2832 } 2833 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 2834 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 2835 /* Allow user's customization */ 2836 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2837 /* Propagate BDDC info to next level */ 2838 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2839 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2840 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2841 if (CoarseNullSpace) { 2842 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 2843 } 2844 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2845 /* hack! TODO: MOVE KSP init before matrix assembly */ 2846 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2847 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2848 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 2849 if (!isbddc && !isnn) { 2850 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 2851 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2852 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 2853 } 2854 if (!isbddc && CoarseNullSpace) { 2855 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 2856 } 2857 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2858 2859 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 2860 if (max_it < 3) { 2861 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 2862 if (isbddc) { /* workaround: TODO see if it could be avoided */ 2863 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 2864 } 2865 } 2866 } 2867 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2868 2869 /* Check coarse problem if requested */ 2870 if (pcbddc->dbg_flag) { 2871 KSP check_ksp; 2872 KSPType check_ksp_type; 2873 PC check_pc; 2874 Vec check_vec; 2875 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 2876 PetscInt its; 2877 PetscBool ispreonly; 2878 2879 /* Create ksp object suitable for estimation of extreme eigenvalues */ 2880 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr); 2881 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2882 ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-12,1000,coarse_size);CHKERRQ(ierr); 2883 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 2884 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 2885 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 2886 if (ispreonly) { 2887 check_ksp_type = KSPPREONLY; 2888 } else { 2889 if (issym) check_ksp_type = KSPCG; 2890 else check_ksp_type = KSPGMRES; 2891 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 2892 } 2893 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 2894 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 2895 /* create random vec */ 2896 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 2897 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 2898 if (CoarseNullSpace) { 2899 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 2900 } 2901 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2902 /* solve coarse problem */ 2903 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2904 if (CoarseNullSpace) { 2905 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 2906 } 2907 /* check coarse problem residual error */ 2908 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 2909 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2910 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2911 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 2912 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 2913 /* get eigenvalue estimation if inexact */ 2914 if (!ispreonly) { 2915 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2916 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 2917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): % 1.14e %1.14e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 2918 } 2919 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 2920 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 2921 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2922 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 2923 } 2924 /* free memory */ 2925 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 2926 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 2927 PetscFunctionReturn(0); 2928 } 2929 2930 #undef __FUNCT__ 2931 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 2932 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 2933 { 2934 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2935 PC_IS* pcis = (PC_IS*)pc->data; 2936 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2937 PetscInt i,j,coarse_size; 2938 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 /* get indices in local ordering for vertices and constraints */ 2943 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2944 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2945 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2946 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2947 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2948 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2949 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2950 2951 /* Compute global number of coarse dofs */ 2952 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2953 2954 /* check numbering */ 2955 if (pcbddc->dbg_flag) { 2956 PetscScalar coarsesum,*array; 2957 2958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2959 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2960 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 2961 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2962 for (i=0;i<pcbddc->local_primal_size;i++) { 2963 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2964 } 2965 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2966 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2967 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2968 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2969 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2970 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2972 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2973 for (i=0;i<pcis->n;i++) { 2974 if (array[i] == 1.0) { 2975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 2976 } 2977 } 2978 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2979 for (i=0;i<pcis->n;i++) { 2980 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2981 } 2982 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2983 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2984 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2985 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2986 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 2988 if (pcbddc->dbg_flag > 1) { 2989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2992 for (i=0;i<pcbddc->local_primal_size;i++) { 2993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 2994 } 2995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2996 } 2997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2998 } 2999 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3000 /* get back data */ 3001 *coarse_size_n = coarse_size; 3002 *local_primal_indices_n = local_primal_indices; 3003 PetscFunctionReturn(0); 3004 } 3005 3006