1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCCreateWorkVectors" 110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 111 { 112 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 113 PC_IS *pcis = (PC_IS*)pc->data; 114 VecType impVecType; 115 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 120 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 121 local_primal_size = n_constraints+n_vertices; 122 n_R = pcis->n-n_vertices; 123 /* local work vectors */ 124 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 125 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 126 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 127 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 128 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 129 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 130 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 131 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 132 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 133 if (n_constraints) { 134 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 135 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 136 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 137 } 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 144 { 145 PetscErrorCode ierr; 146 /* pointers to pcis and pcbddc */ 147 PC_IS* pcis = (PC_IS*)pc->data; 148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 149 /* submatrices of local problem */ 150 Mat A_RV,A_VR,A_VV; 151 /* working matrices */ 152 Mat M1,M2,M3,C_CR; 153 /* working vectors */ 154 Vec vec1_C,vec2_C,vec1_V,vec2_V; 155 /* additional working stuff */ 156 IS is_aux; 157 ISLocalToGlobalMapping BtoNmap; 158 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 159 const PetscScalar *array,*row_cmat_values; 160 const PetscInt *row_cmat_indices,*idx_R_local; 161 PetscInt *vertices,*idx_V_B,*auxindices; 162 PetscInt n_vertices,n_constraints,size_of_constraint; 163 PetscInt i,j,n_R,n_D,n_B; 164 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 165 /* Vector and matrix types */ 166 VecType impVecType; 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 184 185 /* Allocating some extra storage just to be safe */ 186 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 187 for (i=0;i<pcis->n;i++) auxindices[i]=i; 188 189 /* vertices in boundary numbering */ 190 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 191 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 192 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 194 if (i != n_vertices) { 195 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 196 } 197 198 /* some work vectors on vertices and/or constraints */ 199 if (n_vertices) { 200 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 201 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 202 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 203 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 204 } 205 if (n_constraints) { 206 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 208 } 209 210 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 211 if (n_constraints) { 212 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 213 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 214 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 215 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 216 217 /* Extract constraints on R nodes: C_{CR} */ 218 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 220 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 221 222 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 223 for (i=0;i<n_constraints;i++) { 224 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 225 /* Get row of constraint matrix in R numbering */ 226 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 227 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 228 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 229 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 230 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 231 /* Solve for row of constraint matrix in R numbering */ 232 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 233 /* Set values in local_auxmat2 */ 234 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 235 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 237 } 238 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 241 242 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 243 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 244 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 245 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 246 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 247 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(M1);CHKERRQ(ierr); 249 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 250 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 251 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 252 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 253 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 254 ierr = MatDestroy(&M2);CHKERRQ(ierr); 255 ierr = MatDestroy(&M3);CHKERRQ(ierr); 256 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 257 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 258 } 259 260 /* Get submatrices from subdomain matrix */ 261 if (n_vertices) { 262 PetscInt ibs,iibs,mbs; 263 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 264 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 265 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 266 ierr = ISGetBlockSize(is_aux,&iibs);CHKERRQ(ierr); 267 if (ibs != mbs || iibs != mbs) { /* TODO BAIJ */ 268 Mat newmat; 269 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 270 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 271 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 273 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 274 } else { 275 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 276 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 277 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 278 } 279 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 280 } 281 282 /* Matrix of coarse basis functions (local) */ 283 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 284 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 285 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 286 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 287 if (pcbddc->switch_static || pcbddc->dbg_flag) { 288 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 289 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 290 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 291 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 292 } 293 294 if (pcbddc->dbg_flag) { 295 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 296 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 297 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 298 } 299 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 300 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 301 302 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 303 304 /* vertices */ 305 for (i=0;i<n_vertices;i++) { 306 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 307 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 308 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 309 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 310 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 311 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 312 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 313 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 314 if (n_constraints) { 315 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 316 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 317 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 318 } 319 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 320 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 321 322 /* Set values in coarse basis function and subdomain part of coarse_mat */ 323 /* coarse basis functions */ 324 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 325 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 326 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 327 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 328 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 329 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 330 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 331 if (pcbddc->switch_static || pcbddc->dbg_flag) { 332 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 333 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 335 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 337 } 338 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 339 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 340 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 342 if (n_constraints) { 343 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 344 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 345 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 346 } 347 348 /* check */ 349 if (pcbddc->dbg_flag) { 350 /* assemble subdomain vector on local nodes */ 351 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 352 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 353 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 354 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 355 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 356 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 357 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 358 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 359 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 363 if (n_constraints) { 364 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 365 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 366 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 367 } 368 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 369 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 370 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 371 /* check saddle point solution */ 372 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 373 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 374 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 375 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 376 /* shift by the identity matrix */ 377 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 380 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 381 } 382 } 383 384 /* constraints */ 385 for (i=0;i<n_constraints;i++) { 386 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 387 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 388 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 389 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 390 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 391 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 392 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 393 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 394 if (n_vertices) { 395 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 396 } 397 /* Set values in coarse basis function and subdomain part of coarse_mat */ 398 /* coarse basis functions */ 399 j = i+n_vertices; /* don't touch this! */ 400 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 401 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 406 if (pcbddc->switch_static || pcbddc->dbg_flag) { 407 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 409 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 412 } 413 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 414 if (n_vertices) { 415 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 416 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 418 } 419 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 420 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 422 423 if (pcbddc->dbg_flag) { 424 /* assemble subdomain vector on nodes */ 425 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 426 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 if (n_vertices) { 434 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 437 } 438 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 439 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 454 } 455 } 456 /* call assembling routines for local coarse basis */ 457 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 if (pcbddc->switch_static || pcbddc->dbg_flag) { 460 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 461 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 462 } 463 464 /* compute other basis functions for non-symmetric problems */ 465 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 466 if (!setsym || (setsym && !issym)) { 467 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 468 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 470 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 472 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 473 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 474 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 475 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 476 } 477 for (i=0;i<pcbddc->local_primal_size;i++) { 478 if (n_constraints) { 479 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 480 for (j=0;j<n_constraints;j++) { 481 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 482 } 483 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 484 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 485 } 486 if (i<n_vertices) { 487 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 488 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 489 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 490 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 491 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 492 if (n_constraints) { 493 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 494 } 495 } else { 496 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 497 } 498 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 499 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 500 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 503 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 505 if (i<n_vertices) { 506 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 507 } 508 if (pcbddc->switch_static || pcbddc->dbg_flag) { 509 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 511 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 512 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 514 } 515 516 if (pcbddc->dbg_flag) { 517 /* assemble subdomain vector on nodes */ 518 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 520 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 522 if (i<n_vertices) { 523 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 524 } 525 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 526 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 527 /* assemble subdomain vector of lagrange multipliers */ 528 for (j=0;j<pcbddc->local_primal_size;j++) { 529 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 530 } 531 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 532 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 533 /* check saddle point solution */ 534 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 535 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 536 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 537 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 538 /* shift by the identity matrix */ 539 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 540 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 541 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 543 } 544 } 545 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 if (pcbddc->switch_static || pcbddc->dbg_flag) { 548 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 } 551 } 552 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 553 /* Checking coarse_sub_mat and coarse basis functios */ 554 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 555 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 556 if (pcbddc->dbg_flag) { 557 Mat coarse_sub_mat; 558 Mat AUXMAT,TM1,TM2,TM3,TM4; 559 Mat coarse_phi_D,coarse_phi_B; 560 Mat coarse_psi_D,coarse_psi_B; 561 Mat A_II,A_BB,A_IB,A_BI; 562 MatType checkmattype=MATSEQAIJ; 563 PetscReal real_value; 564 565 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 566 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 567 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 568 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 569 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 570 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 571 if (pcbddc->coarse_psi_B) { 572 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 573 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 574 } 575 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 576 577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 580 if (pcbddc->coarse_psi_B) { 581 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 582 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 583 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 584 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 585 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 586 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 587 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 588 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 589 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 590 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 591 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 592 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 593 } else { 594 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 595 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 596 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 597 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 598 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 599 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 600 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 601 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 602 } 603 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 604 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 605 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 606 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 607 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 608 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 613 for (i=0;i<pcbddc->local_primal_size;i++) { 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 617 for (i=0;i<pcbddc->local_primal_size;i++) { 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 619 } 620 if (pcbddc->coarse_psi_B) { 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 622 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 624 } 625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 626 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 628 } 629 } 630 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 631 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 632 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 633 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 634 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 635 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 636 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 637 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 638 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 639 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 640 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 641 if (pcbddc->coarse_psi_B) { 642 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 643 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 644 } 645 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 646 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 647 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 648 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 649 } 650 /* free memory */ 651 if (n_vertices) { 652 ierr = PetscFree(vertices);CHKERRQ(ierr); 653 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 654 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 655 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 656 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 657 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 658 } 659 if (n_constraints) { 660 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 661 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 662 ierr = MatDestroy(&M1);CHKERRQ(ierr); 663 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 664 } 665 ierr = PetscFree(auxindices);CHKERRQ(ierr); 666 /* get back data */ 667 *coarse_submat_vals_n = coarse_submat_vals; 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 673 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 674 { 675 PC_IS* pcis = (PC_IS*)(pc->data); 676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 677 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 678 /* manage repeated solves */ 679 MatReuse reuse; 680 MatStructure matstruct; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 /* get mat flags */ 685 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 686 reuse = MAT_INITIAL_MATRIX; 687 if (pc->setupcalled) { 688 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 689 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 690 if (matstruct == SAME_NONZERO_PATTERN) { 691 reuse = MAT_REUSE_MATRIX; 692 } else { 693 reuse = MAT_INITIAL_MATRIX; 694 } 695 } 696 if (reuse == MAT_INITIAL_MATRIX) { 697 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 701 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 702 } 703 704 /* transform local matrices if needed */ 705 if (pcbddc->use_change_of_basis) { 706 Mat change_mat_all; 707 PetscScalar *row_cmat_values; 708 PetscInt *row_cmat_indices; 709 PetscInt *nnz,*is_indices,*temp_indices; 710 PetscInt i,j,k,n_D,n_B; 711 712 /* Get Non-overlapping dimensions */ 713 n_B = pcis->n_B; 714 n_D = pcis->n-n_B; 715 716 /* compute nonzero structure of change of basis on all local nodes */ 717 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 718 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 719 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 720 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 721 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 722 k=1; 723 for (i=0;i<n_B;i++) { 724 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 725 nnz[is_indices[i]]=j; 726 if (k < j) k = j; 727 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 728 } 729 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 730 /* assemble change of basis matrix on the whole set of local dofs */ 731 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 732 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 733 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 734 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 735 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) { 738 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 739 } 740 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 742 for (i=0;i<n_B;i++) { 743 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 744 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 745 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 746 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 747 } 748 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 749 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 750 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 751 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 752 if (i==1) { 753 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 754 } else { 755 Mat work_mat; 756 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 757 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 758 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 759 } 760 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 761 ierr = PetscFree(nnz);CHKERRQ(ierr); 762 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 763 } else { 764 /* without change of basis, the local matrix is unchanged */ 765 if (!pcbddc->local_mat) { 766 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 767 pcbddc->local_mat = matis->A; 768 } 769 } 770 771 /* get submatrices */ 772 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 773 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 774 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 775 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 781 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 782 { 783 PC_IS* pcis = (PC_IS*)(pc->data); 784 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 785 IS is_aux1,is_aux2; 786 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 787 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 788 PetscBT bitmask; 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 /* Set Non-overlapping dimensions */ 793 n_B = pcis->n_B; n_D = pcis->n - n_B; 794 /* get vertex indices from constraint matrix */ 795 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 796 /* Set number of constraints */ 797 n_constraints = pcbddc->local_primal_size-n_vertices; 798 /* create auxiliary bitmask */ 799 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 800 for (i=0;i<n_vertices;i++) { 801 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 802 } 803 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 804 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 805 for (i=0, n_R=0; i<pcis->n; i++) { 806 if (!PetscBTLookup(bitmask,i)) { 807 idx_R_local[n_R] = i; 808 n_R++; 809 } 810 } 811 ierr = PetscFree(vertices);CHKERRQ(ierr); 812 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 813 814 /* print some info if requested */ 815 if (pcbddc->dbg_flag) { 816 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 817 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 822 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 823 } 824 825 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 826 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 827 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 828 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 829 for (i=0; i<n_D; i++) { 830 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 831 } 832 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 833 for (i=0, j=0; i<n_R; i++) { 834 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 835 aux_array1[j++] = i; 836 } 837 } 838 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 839 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 840 for (i=0, j=0; i<n_B; i++) { 841 if (!PetscBTLookup(bitmask,is_indices[i])) { 842 aux_array2[j++] = i; 843 } 844 } 845 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 846 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 847 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 848 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 849 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 850 851 if (pcbddc->switch_static || pcbddc->dbg_flag) { 852 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 853 for (i=0, j=0; i<n_R; i++) { 854 if (PetscBTLookup(bitmask,idx_R_local[i])) { 855 aux_array1[j++] = i; 856 } 857 } 858 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 859 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 860 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 861 } 862 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 869 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 870 { 871 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 872 PC_IS *pcis = (PC_IS*)pc->data; 873 PC pc_temp; 874 Mat A_RR; 875 Vec vec1,vec2,vec3; 876 MatStructure matstruct; 877 PetscScalar m_one = -1.0; 878 PetscReal value; 879 PetscInt n_D,n_R,ibs,mbs; 880 PetscBool use_exact,use_exact_reduced; 881 PetscErrorCode ierr; 882 /* prefixes stuff */ 883 char dir_prefix[256],neu_prefix[256],str_level[3]; 884 size_t len; 885 886 PetscFunctionBegin; 887 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 888 889 /* compute prefixes */ 890 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 891 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 892 if (!pcbddc->current_level) { 893 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 894 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 895 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 896 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 897 } else { 898 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 899 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 900 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 901 len -= 15; /* remove "pc_bddc_coarse_" */ 902 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 903 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 904 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 905 *(dir_prefix+len)='\0'; 906 *(neu_prefix+len)='\0'; 907 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 908 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 909 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 910 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 911 } 912 913 /* DIRICHLET PROBLEM */ 914 /* Matrix for Dirichlet problem is pcis->A_II */ 915 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 916 if (!pcbddc->ksp_D) { /* create object if not yet build */ 917 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 918 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 919 /* default */ 920 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 921 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 922 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 923 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 924 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 925 } 926 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 927 /* Allow user's customization */ 928 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 929 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 930 if (!n_D) { 931 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 932 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 933 } 934 /* Set Up KSP for Dirichlet problem of BDDC */ 935 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 936 /* set ksp_D into pcis data */ 937 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 938 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 939 pcis->ksp_D = pcbddc->ksp_D; 940 941 /* NEUMANN PROBLEM */ 942 /* Matrix for Neumann problem is A_RR -> we need to create it */ 943 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 944 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 945 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 946 if (ibs != mbs) { /* TODO BAIJ: see if this can be avoided */ 947 Mat newmat; 948 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 949 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 950 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 951 } else { 952 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 953 } 954 if (!pcbddc->ksp_R) { /* create object if not yet build */ 955 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 956 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 957 /* default */ 958 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 959 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 960 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 961 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 962 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 963 } 964 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 965 /* Allow user's customization */ 966 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 967 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 968 if (!n_R) { 969 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 970 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 971 } 972 /* Set Up KSP for Neumann problem of BDDC */ 973 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 974 975 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 976 977 /* Dirichlet */ 978 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 979 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 980 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 981 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 982 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 983 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 984 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 985 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 986 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 987 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 988 /* need to be adapted? */ 989 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 990 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 991 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 992 /* print info */ 993 if (pcbddc->dbg_flag) { 994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 995 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 996 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1001 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1002 } 1003 1004 /* Neumann */ 1005 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1006 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1007 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1008 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1009 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1010 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1011 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1012 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1013 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1014 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1015 /* need to be adapted? */ 1016 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1017 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1018 /* print info */ 1019 if (pcbddc->dbg_flag) { 1020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1021 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1022 } 1023 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1024 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1025 } 1026 /* free Neumann problem's matrix */ 1027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1033 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1034 { 1035 PetscErrorCode ierr; 1036 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1037 1038 PetscFunctionBegin; 1039 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1040 if (pcbddc->local_auxmat1) { 1041 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1042 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1049 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1050 { 1051 PetscErrorCode ierr; 1052 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1053 PC_IS* pcis = (PC_IS*) (pc->data); 1054 const PetscScalar zero = 0.0; 1055 1056 PetscFunctionBegin; 1057 /* Application of PHI^T (or PSI^T) */ 1058 if (pcbddc->coarse_psi_B) { 1059 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1060 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1061 } else { 1062 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1063 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1064 } 1065 /* Scatter data of coarse_rhs */ 1066 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1067 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1068 1069 /* Local solution on R nodes */ 1070 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1071 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1072 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 if (pcbddc->switch_static) { 1074 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 } 1077 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1078 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1079 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 if (pcbddc->switch_static) { 1082 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 } 1085 1086 /* Coarse solution */ 1087 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1089 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1090 } 1091 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1093 1094 /* Sum contributions from two levels */ 1095 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1096 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1102 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1103 { 1104 PetscErrorCode ierr; 1105 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1106 1107 PetscFunctionBegin; 1108 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1114 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1115 { 1116 PetscErrorCode ierr; 1117 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1118 1119 PetscFunctionBegin; 1120 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 /* uncomment for testing purposes */ 1125 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1128 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1129 { 1130 PetscErrorCode ierr; 1131 PC_IS* pcis = (PC_IS*)(pc->data); 1132 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1133 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1134 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1135 MatType impMatType=MATSEQAIJ; 1136 /* one and zero */ 1137 PetscScalar one=1.0,zero=0.0; 1138 /* space to store constraints and their local indices */ 1139 PetscScalar *temp_quadrature_constraint; 1140 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1141 /* iterators */ 1142 PetscInt i,j,k,total_counts,temp_start_ptr; 1143 /* stuff to store connected components stored in pcbddc->mat_graph */ 1144 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1145 PetscInt n_ISForFaces,n_ISForEdges; 1146 /* near null space stuff */ 1147 MatNullSpace nearnullsp; 1148 const Vec *nearnullvecs; 1149 Vec *localnearnullsp; 1150 PetscBool nnsp_has_cnst; 1151 PetscInt nnsp_size; 1152 PetscScalar *array; 1153 /* BLAS integers */ 1154 PetscBLASInt lwork,lierr; 1155 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1156 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1157 /* LAPACK working arrays for SVD or POD */ 1158 PetscBool skip_lapack; 1159 PetscScalar *work; 1160 PetscReal *singular_vals; 1161 #if defined(PETSC_USE_COMPLEX) 1162 PetscReal *rwork; 1163 #endif 1164 #if defined(PETSC_MISSING_LAPACK_GESVD) 1165 PetscBLASInt Blas_one_2=1; 1166 PetscScalar *temp_basis,*correlation_mat; 1167 #else 1168 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1169 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1170 #endif 1171 /* change of basis */ 1172 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1173 PetscBool boolforchange; 1174 PetscBT touched,change_basis; 1175 /* auxiliary stuff */ 1176 PetscInt *nnz,*is_indices,*local_to_B; 1177 /* some quantities */ 1178 PetscInt n_vertices,total_primal_vertices; 1179 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1180 1181 1182 PetscFunctionBegin; 1183 /* Get index sets for faces, edges and vertices from graph */ 1184 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1185 pcbddc->use_vertices = PETSC_TRUE; 1186 } 1187 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1188 /* HACK: provide functions to set change of basis */ 1189 if (!ISForVertices && pcbddc->NullSpace) { 1190 pcbddc->use_change_of_basis = PETSC_TRUE; 1191 pcbddc->use_change_on_faces = PETSC_FALSE; 1192 } 1193 /* print some info */ 1194 if (pcbddc->dbg_flag) { 1195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1196 i = 0; 1197 if (ISForVertices) { 1198 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1199 } 1200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1203 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1204 } 1205 /* check if near null space is attached to global mat */ 1206 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1207 if (nearnullsp) { 1208 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1209 } else { /* if near null space is not provided BDDC uses constants by default */ 1210 nnsp_size = 0; 1211 nnsp_has_cnst = PETSC_TRUE; 1212 } 1213 /* get max number of constraints on a single cc */ 1214 max_constraints = nnsp_size; 1215 if (nnsp_has_cnst) max_constraints++; 1216 1217 /* 1218 Evaluate maximum storage size needed by the procedure 1219 - temp_indices will contain start index of each constraint stored as follows 1220 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1221 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1222 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1223 */ 1224 total_counts = n_ISForFaces+n_ISForEdges; 1225 total_counts *= max_constraints; 1226 n_vertices = 0; 1227 if (ISForVertices) { 1228 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1229 } 1230 total_counts += n_vertices; 1231 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1232 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1233 total_counts = 0; 1234 max_size_of_constraint = 0; 1235 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1236 if (i<n_ISForEdges) { 1237 used_IS = &ISForEdges[i]; 1238 } else { 1239 used_IS = &ISForFaces[i-n_ISForEdges]; 1240 } 1241 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1242 total_counts += j; 1243 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1244 } 1245 total_counts *= max_constraints; 1246 total_counts += n_vertices; 1247 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1248 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1249 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1250 /* local to boundary numbering */ 1251 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1252 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1253 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1254 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1255 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1256 /* get local part of global near null space vectors */ 1257 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1258 for (k=0;k<nnsp_size;k++) { 1259 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1260 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 } 1263 1264 /* whether or not to skip lapack calls */ 1265 skip_lapack = PETSC_TRUE; 1266 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1267 1268 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1269 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1270 PetscScalar temp_work; 1271 #if defined(PETSC_MISSING_LAPACK_GESVD) 1272 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1273 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1274 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1275 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1276 #if defined(PETSC_USE_COMPLEX) 1277 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1278 #endif 1279 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1280 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1281 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1282 lwork = -1; 1283 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1284 #if !defined(PETSC_USE_COMPLEX) 1285 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1286 #else 1287 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1288 #endif 1289 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1290 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1291 #else /* on missing GESVD */ 1292 /* SVD */ 1293 PetscInt max_n,min_n; 1294 max_n = max_size_of_constraint; 1295 min_n = max_constraints; 1296 if (max_size_of_constraint < max_constraints) { 1297 min_n = max_size_of_constraint; 1298 max_n = max_constraints; 1299 } 1300 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1301 #if defined(PETSC_USE_COMPLEX) 1302 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1303 #endif 1304 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1305 lwork = -1; 1306 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1307 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1308 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1309 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1310 #if !defined(PETSC_USE_COMPLEX) 1311 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1312 #else 1313 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1314 #endif 1315 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1316 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1317 #endif /* on missing GESVD */ 1318 /* Allocate optimal workspace */ 1319 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1320 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1321 } 1322 /* Now we can loop on constraining sets */ 1323 total_counts = 0; 1324 temp_indices[0] = 0; 1325 /* vertices */ 1326 if (ISForVertices) { 1327 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1328 if (nnsp_has_cnst) { /* consider all vertices */ 1329 for (i=0;i<n_vertices;i++) { 1330 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1331 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1332 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1333 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1334 total_counts++; 1335 } 1336 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1337 PetscBool used_vertex; 1338 for (i=0;i<n_vertices;i++) { 1339 used_vertex = PETSC_FALSE; 1340 k = 0; 1341 while (!used_vertex && k<nnsp_size) { 1342 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1343 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1344 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1345 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1346 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1347 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1348 total_counts++; 1349 used_vertex = PETSC_TRUE; 1350 } 1351 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1352 k++; 1353 } 1354 } 1355 } 1356 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1357 n_vertices = total_counts; 1358 } 1359 1360 /* edges and faces */ 1361 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1362 if (i<n_ISForEdges) { 1363 used_IS = &ISForEdges[i]; 1364 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1365 } else { 1366 used_IS = &ISForFaces[i-n_ISForEdges]; 1367 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1368 } 1369 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1370 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1371 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1372 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1373 /* change of basis should not be performed on local periodic nodes */ 1374 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1375 if (nnsp_has_cnst) { 1376 PetscScalar quad_value; 1377 temp_constraints++; 1378 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1379 for (j=0;j<size_of_constraint;j++) { 1380 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1381 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1382 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1383 } 1384 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1385 if (boolforchange) { 1386 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1387 } 1388 total_counts++; 1389 } 1390 for (k=0;k<nnsp_size;k++) { 1391 PetscReal real_value; 1392 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1393 for (j=0;j<size_of_constraint;j++) { 1394 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1395 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1396 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1397 } 1398 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1399 /* check if array is null on the connected component */ 1400 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1401 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1402 if (real_value > 0.0) { /* keep indices and values */ 1403 temp_constraints++; 1404 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1405 if (boolforchange) { 1406 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1407 } 1408 total_counts++; 1409 } 1410 } 1411 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1412 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1413 if (!pcbddc->use_nnsp_true) { 1414 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1415 1416 #if defined(PETSC_MISSING_LAPACK_GESVD) 1417 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1418 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1419 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1420 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1421 from that computed using LAPACKgesvd 1422 -> This is due to a different computation of eigenvectors in LAPACKheev 1423 -> The quality of the POD-computed basis will be the same */ 1424 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1425 /* Store upper triangular part of correlation matrix */ 1426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1427 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1428 for (j=0;j<temp_constraints;j++) { 1429 for (k=0;k<j+1;k++) { 1430 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1431 } 1432 } 1433 /* compute eigenvalues and eigenvectors of correlation matrix */ 1434 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1435 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1436 #if !defined(PETSC_USE_COMPLEX) 1437 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1438 #else 1439 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1440 #endif 1441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1442 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1443 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1444 j=0; 1445 while (j < temp_constraints && singular_vals[j] < tol) j++; 1446 total_counts=total_counts-j; 1447 /* scale and copy POD basis into used quadrature memory */ 1448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1449 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1450 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1451 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1454 if (j<temp_constraints) { 1455 PetscInt ii; 1456 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1457 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1458 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1459 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1460 for (k=0;k<temp_constraints-j;k++) { 1461 for (ii=0;ii<size_of_constraint;ii++) { 1462 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1463 } 1464 } 1465 } 1466 #else /* on missing GESVD */ 1467 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1468 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1469 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1470 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1471 #if !defined(PETSC_USE_COMPLEX) 1472 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1473 #else 1474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1475 #endif 1476 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1478 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1479 k = temp_constraints; 1480 if (k > size_of_constraint) k = size_of_constraint; 1481 j = 0; 1482 while (j < k && singular_vals[k-j-1] < tol) j++; 1483 total_counts = total_counts-temp_constraints+k-j; 1484 #endif /* on missing GESVD */ 1485 } 1486 } 1487 /* free index sets of faces, edges and vertices */ 1488 for (i=0;i<n_ISForFaces;i++) { 1489 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1490 } 1491 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1492 for (i=0;i<n_ISForEdges;i++) { 1493 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1494 } 1495 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1496 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1497 1498 /* free workspace */ 1499 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1500 ierr = PetscFree(work);CHKERRQ(ierr); 1501 #if defined(PETSC_USE_COMPLEX) 1502 ierr = PetscFree(rwork);CHKERRQ(ierr); 1503 #endif 1504 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1505 #if defined(PETSC_MISSING_LAPACK_GESVD) 1506 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1507 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1508 #endif 1509 } 1510 for (k=0;k<nnsp_size;k++) { 1511 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1512 } 1513 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1514 1515 /* set quantities in pcbddc data structure */ 1516 /* n_vertices defines the number of subdomain corners in the primal space */ 1517 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1518 pcbddc->local_primal_size = total_counts; 1519 pcbddc->n_vertices = n_vertices; 1520 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1521 1522 /* Create constraint matrix */ 1523 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1524 /* so we need to set it up properly either with or without change of basis */ 1525 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1526 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1527 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1528 /* array to compute a local numbering of constraints : vertices first then constraints */ 1529 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1530 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1531 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1532 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1533 /* auxiliary stuff for basis change */ 1534 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1535 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1536 1537 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1538 total_primal_vertices=0; 1539 for (i=0;i<pcbddc->local_primal_size;i++) { 1540 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1541 if (size_of_constraint == 1) { 1542 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1543 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1544 aux_primal_minloc[total_primal_vertices]=0; 1545 total_primal_vertices++; 1546 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1547 PetscInt min_loc,min_index; 1548 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1549 /* find first untouched local node */ 1550 k = 0; 1551 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1552 min_index = global_indices[k]; 1553 min_loc = k; 1554 /* search the minimum among global nodes already untouched on the cc */ 1555 for (k=1;k<size_of_constraint;k++) { 1556 /* there can be more than one constraint on a single connected component */ 1557 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1558 min_index = global_indices[k]; 1559 min_loc = k; 1560 } 1561 } 1562 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1563 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1564 aux_primal_minloc[total_primal_vertices]=min_loc; 1565 total_primal_vertices++; 1566 } 1567 } 1568 /* free workspace */ 1569 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1570 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1571 /* permute indices in order to have a sorted set of vertices */ 1572 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1573 1574 /* nonzero structure of constraint matrix */ 1575 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1576 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1577 j=total_primal_vertices; 1578 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1579 if (!PetscBTLookup(change_basis,i)) { 1580 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1581 j++; 1582 } 1583 } 1584 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1585 ierr = PetscFree(nnz);CHKERRQ(ierr); 1586 /* set values in constraint matrix */ 1587 for (i=0;i<total_primal_vertices;i++) { 1588 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1589 } 1590 total_counts = total_primal_vertices; 1591 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1592 if (!PetscBTLookup(change_basis,i)) { 1593 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1594 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1595 total_counts++; 1596 } 1597 } 1598 /* assembling */ 1599 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1600 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1601 /* 1602 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1603 */ 1604 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1605 if (pcbddc->use_change_of_basis) { 1606 PetscBool qr_needed = PETSC_FALSE; 1607 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1608 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1609 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1610 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1611 /* work arrays */ 1612 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1613 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1614 /* nonzeros per row */ 1615 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1616 if (PetscBTLookup(change_basis,i)) { 1617 qr_needed = PETSC_TRUE; 1618 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1619 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1620 } 1621 } 1622 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1623 ierr = PetscFree(nnz);CHKERRQ(ierr); 1624 /* Set initial identity in the matrix */ 1625 for (i=0;i<pcis->n_B;i++) { 1626 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1627 } 1628 1629 /* Now we loop on the constraints which need a change of basis */ 1630 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1631 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1632 /* Change of basis matrix T computed via QR decomposition of constraints */ 1633 if (qr_needed) { 1634 /* dual and primal dofs on a single cc */ 1635 PetscInt dual_dofs,primal_dofs; 1636 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1637 PetscInt primal_counter; 1638 /* working stuff for GEQRF */ 1639 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1640 PetscBLASInt lqr_work; 1641 /* working stuff for UNGQR */ 1642 PetscScalar *gqr_work,lgqr_work_t; 1643 PetscBLASInt lgqr_work; 1644 /* working stuff for TRTRS */ 1645 PetscScalar *trs_rhs; 1646 PetscBLASInt Blas_NRHS; 1647 /* pointers for values insertion into change of basis matrix */ 1648 PetscInt *start_rows,*start_cols; 1649 PetscScalar *start_vals; 1650 /* working stuff for values insertion */ 1651 PetscBT is_primal; 1652 1653 /* space to store Q */ 1654 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1655 /* first we issue queries for optimal work */ 1656 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1657 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1658 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1659 lqr_work = -1; 1660 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1661 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1662 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1663 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1664 lgqr_work = -1; 1665 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1666 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1667 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1668 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1669 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1670 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1671 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1672 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1673 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1674 /* array to store scaling factors for reflectors */ 1675 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1676 /* array to store rhs and solution of triangular solver */ 1677 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1678 /* array to store whether a node is primal or not */ 1679 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1680 for (i=0;i<total_primal_vertices;i++) { 1681 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1682 } 1683 1684 /* allocating workspace for check */ 1685 if (pcbddc->dbg_flag) { 1686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1688 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1689 } 1690 1691 /* loop on constraints and see whether or not they need a change of basis */ 1692 /* -> using implicit ordering contained in temp_indices data */ 1693 total_counts = pcbddc->n_vertices; 1694 primal_counter = total_counts; 1695 while (total_counts<pcbddc->local_primal_size) { 1696 primal_dofs = 1; 1697 if (PetscBTLookup(change_basis,total_counts)) { 1698 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1699 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1700 primal_dofs++; 1701 } 1702 /* get constraint info */ 1703 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1704 dual_dofs = size_of_constraint-primal_dofs; 1705 1706 /* copy quadrature constraints for change of basis check */ 1707 if (pcbddc->dbg_flag) { 1708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1709 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1710 } 1711 1712 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1713 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1714 1715 /* compute QR decomposition of constraints */ 1716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1717 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1718 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1720 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1721 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1722 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1723 1724 /* explictly compute R^-T */ 1725 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1726 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1727 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1728 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1731 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1732 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1733 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1734 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1735 1736 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1739 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1741 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1742 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1743 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1744 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1745 1746 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1747 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1748 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1750 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1751 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1752 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1753 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1755 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1756 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1757 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1758 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1759 1760 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1761 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1762 /* insert cols for primal dofs */ 1763 for (j=0;j<primal_dofs;j++) { 1764 start_vals = &qr_basis[j*size_of_constraint]; 1765 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1766 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1767 } 1768 /* insert cols for dual dofs */ 1769 for (j=0,k=0;j<dual_dofs;k++) { 1770 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1771 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1772 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1773 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1774 j++; 1775 } 1776 } 1777 1778 /* check change of basis */ 1779 if (pcbddc->dbg_flag) { 1780 PetscInt ii,jj; 1781 PetscBool valid_qr=PETSC_TRUE; 1782 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1784 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1785 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1786 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1787 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1788 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1789 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1790 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1791 for (jj=0;jj<size_of_constraint;jj++) { 1792 for (ii=0;ii<primal_dofs;ii++) { 1793 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1794 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1795 } 1796 } 1797 if (!valid_qr) { 1798 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1799 for (jj=0;jj<size_of_constraint;jj++) { 1800 for (ii=0;ii<primal_dofs;ii++) { 1801 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1802 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1803 } 1804 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1805 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1806 } 1807 } 1808 } 1809 } else { 1810 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1811 } 1812 } 1813 /* increment primal counter */ 1814 primal_counter += primal_dofs; 1815 } else { 1816 if (pcbddc->dbg_flag) { 1817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1818 } 1819 } 1820 /* increment constraint counter total_counts */ 1821 total_counts += primal_dofs; 1822 } 1823 if (pcbddc->dbg_flag) { 1824 ierr = PetscFree(work);CHKERRQ(ierr); 1825 } 1826 /* free workspace */ 1827 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1828 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1829 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1830 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1831 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1832 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1833 } 1834 /* assembling */ 1835 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1836 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1837 /* 1838 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1839 */ 1840 } 1841 if (pcbddc->dbg_flag) { 1842 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1843 } 1844 /* free workspace */ 1845 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1846 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1848 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1849 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1850 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1851 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1852 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1858 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1859 { 1860 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1861 PC_IS *pcis = (PC_IS*)pc->data; 1862 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1863 PetscInt bs,ierr,i,vertex_size; 1864 PetscViewer viewer=pcbddc->dbg_viewer; 1865 1866 PetscFunctionBegin; 1867 /* Init local Graph struct */ 1868 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1869 1870 /* Check validity of the csr graph passed in by the user */ 1871 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1872 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1873 } 1874 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1875 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1876 Mat mat_adj; 1877 const PetscInt *xadj,*adjncy; 1878 PetscBool flg_row=PETSC_TRUE; 1879 1880 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1881 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1882 if (!flg_row) { 1883 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1884 } 1885 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1886 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1887 if (!flg_row) { 1888 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1889 } 1890 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1891 } 1892 1893 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1894 vertex_size = 1; 1895 if (!pcbddc->n_ISForDofs) { 1896 IS *custom_ISForDofs; 1897 1898 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1899 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1900 for (i=0;i<bs;i++) { 1901 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1902 } 1903 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1904 /* remove my references to IS objects */ 1905 for (i=0;i<bs;i++) { 1906 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1907 } 1908 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1909 } else { /* mat block size as vertex size (used for elasticity) */ 1910 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1911 } 1912 1913 /* Setup of Graph */ 1914 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1915 1916 /* Graph's connected components analysis */ 1917 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1918 1919 /* print some info to stdout */ 1920 if (pcbddc->dbg_flag) { 1921 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1928 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1929 { 1930 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1931 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 n = 0; 1936 vertices = 0; 1937 if (pcbddc->ConstraintMatrix) { 1938 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1939 for (i=0;i<local_primal_size;i++) { 1940 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1941 if (size_of_constraint == 1) n++; 1942 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1943 } 1944 if (vertices_idx) { 1945 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1946 n = 0; 1947 for (i=0;i<local_primal_size;i++) { 1948 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1949 if (size_of_constraint == 1) { 1950 vertices[n++]=row_cmat_indices[0]; 1951 } 1952 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1953 } 1954 } 1955 } 1956 *n_vertices = n; 1957 if (vertices_idx) *vertices_idx = vertices; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1963 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1964 { 1965 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1966 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1967 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1968 PetscBT touched; 1969 PetscErrorCode ierr; 1970 1971 /* This function assumes that the number of local constraints per connected component 1972 is not greater than the number of nodes defined for the connected component 1973 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1974 PetscFunctionBegin; 1975 n = 0; 1976 constraints_index = 0; 1977 if (pcbddc->ConstraintMatrix) { 1978 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1979 max_size_of_constraint = 0; 1980 for (i=0;i<local_primal_size;i++) { 1981 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1982 if (size_of_constraint > 1) { 1983 n++; 1984 } 1985 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1986 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1987 } 1988 if (constraints_idx) { 1989 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1990 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1991 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1992 n = 0; 1993 for (i=0;i<local_primal_size;i++) { 1994 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1995 if (size_of_constraint > 1) { 1996 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1997 /* find first untouched local node */ 1998 j = 0; 1999 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2000 min_index = row_cmat_global_indices[j]; 2001 min_loc = j; 2002 /* search the minimum among nodes not yet touched on the connected component 2003 since there can be more than one constraint on a single cc */ 2004 for (j=1;j<size_of_constraint;j++) { 2005 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2006 min_index = row_cmat_global_indices[j]; 2007 min_loc = j; 2008 } 2009 } 2010 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2011 constraints_index[n++] = row_cmat_indices[min_loc]; 2012 } 2013 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2014 } 2015 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2016 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2017 } 2018 } 2019 *n_constraints = n; 2020 if (constraints_idx) *constraints_idx = constraints_index; 2021 PetscFunctionReturn(0); 2022 } 2023 2024 /* the next two functions has been adapted from pcis.c */ 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PCBDDCApplySchur" 2027 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2028 { 2029 PetscErrorCode ierr; 2030 PC_IS *pcis = (PC_IS*)(pc->data); 2031 2032 PetscFunctionBegin; 2033 if (!vec2_B) { vec2_B = v; } 2034 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2035 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2036 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2037 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2038 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2044 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2045 { 2046 PetscErrorCode ierr; 2047 PC_IS *pcis = (PC_IS*)(pc->data); 2048 2049 PetscFunctionBegin; 2050 if (!vec2_B) { vec2_B = v; } 2051 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2052 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2053 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2054 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2055 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "PCBDDCSubsetNumbering" 2061 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2062 { 2063 Vec local_vec,global_vec; 2064 IS seqis,paris; 2065 VecScatter scatter_ctx; 2066 PetscScalar *array; 2067 PetscInt *temp_global_dofs; 2068 PetscScalar globalsum; 2069 PetscInt i,j,s; 2070 PetscInt nlocals,first_index,old_index,max_local; 2071 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2072 PetscMPIInt *dof_sizes,*dof_displs; 2073 PetscBool first_found; 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 /* mpi buffers */ 2078 MPI_Comm_size(comm,&size_prec_comm); 2079 MPI_Comm_rank(comm,&rank_prec_comm); 2080 j = ( !rank_prec_comm ? size_prec_comm : 0); 2081 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2082 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2083 /* get maximum size of subset */ 2084 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2085 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2086 max_local = 0; 2087 if (n_local_dofs) { 2088 max_local = temp_global_dofs[0]; 2089 for (i=1;i<n_local_dofs;i++) { 2090 if (max_local < temp_global_dofs[i] ) { 2091 max_local = temp_global_dofs[i]; 2092 } 2093 } 2094 } 2095 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2096 max_global++; 2097 max_local = 0; 2098 if (n_local_dofs) { 2099 max_local = local_dofs[0]; 2100 for (i=1;i<n_local_dofs;i++) { 2101 if (max_local < local_dofs[i] ) { 2102 max_local = local_dofs[i]; 2103 } 2104 } 2105 } 2106 max_local++; 2107 /* allocate workspace */ 2108 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2109 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2110 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2111 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2112 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2113 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2114 /* create scatter */ 2115 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2116 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2117 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2118 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2119 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2120 /* init array */ 2121 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2122 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2123 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2124 if (local_dofs_mult) { 2125 for (i=0;i<n_local_dofs;i++) { 2126 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2127 } 2128 } else { 2129 for (i=0;i<n_local_dofs;i++) { 2130 array[local_dofs[i]]=1.0; 2131 } 2132 } 2133 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2134 /* scatter into global vec and get total number of global dofs */ 2135 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2137 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2138 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2139 /* Fill global_vec with cumulative function for global numbering */ 2140 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2141 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2142 nlocals = 0; 2143 first_index = -1; 2144 first_found = PETSC_FALSE; 2145 for (i=0;i<s;i++) { 2146 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2147 first_found = PETSC_TRUE; 2148 first_index = i; 2149 } 2150 nlocals += (PetscInt)PetscRealPart(array[i]); 2151 } 2152 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2153 if (!rank_prec_comm) { 2154 dof_displs[0]=0; 2155 for (i=1;i<size_prec_comm;i++) { 2156 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2157 } 2158 } 2159 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2160 if (first_found) { 2161 array[first_index] += (PetscScalar)nlocals; 2162 old_index = first_index; 2163 for (i=first_index+1;i<s;i++) { 2164 if (PetscRealPart(array[i]) > 0.0) { 2165 array[i] += array[old_index]; 2166 old_index = i; 2167 } 2168 } 2169 } 2170 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2171 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2172 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2173 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2174 /* get global ordering of local dofs */ 2175 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2176 if (local_dofs_mult) { 2177 for (i=0;i<n_local_dofs;i++) { 2178 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2179 } 2180 } else { 2181 for (i=0;i<n_local_dofs;i++) { 2182 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2183 } 2184 } 2185 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2186 /* free workspace */ 2187 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2188 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2189 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2190 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2191 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2192 /* return pointer to global ordering of local dofs */ 2193 *global_numbering_subset = temp_global_dofs; 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2199 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2200 { 2201 PetscInt i,j; 2202 PetscScalar *alphas; 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 /* this implements stabilized Gram-Schmidt */ 2207 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2208 for (i=0;i<n;i++) { 2209 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2210 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2211 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2212 } 2213 ierr = PetscFree(alphas);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "MatConvert_IS_AIJ" 2220 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2221 { 2222 Mat new_mat; 2223 Mat_IS *matis = (Mat_IS*)(mat->data); 2224 /* info on mat */ 2225 PetscInt bs,rows,cols; 2226 PetscInt lrows,lcols; 2227 PetscInt local_rows,local_cols; 2228 PetscMPIInt nsubdomains; 2229 /* preallocation */ 2230 Vec vec_dnz,vec_onz; 2231 PetscScalar *my_dnz,*my_onz,*array; 2232 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2233 PetscInt index_row,index_col,owner; 2234 PetscInt *local_indices,*global_indices; 2235 /* work */ 2236 PetscInt i,j; 2237 PetscErrorCode ierr; 2238 2239 PetscFunctionBegin; 2240 /* CHECKS */ 2241 /* get info from mat */ 2242 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2243 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2244 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2245 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2246 2247 /* MAT_INITIAL_MATRIX */ 2248 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2249 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2250 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2251 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2252 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2253 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2254 2255 /* 2256 preallocation 2257 */ 2258 2259 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2260 /* 2261 Some vectors are needed to sum up properly on shared interface dofs. 2262 Note that preallocation is not exact, since it overestimates nonzeros 2263 */ 2264 /* 2265 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2266 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2267 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2268 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2269 */ 2270 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2271 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2272 /* All processes need to compute entire row ownership */ 2273 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2274 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2275 for (i=0;i<nsubdomains;i++) { 2276 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2277 row_ownership[j]=i; 2278 } 2279 } 2280 /* map indices of local mat to global values */ 2281 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2282 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2283 for (i=0;i<local_rows;i++) local_indices[i]=i; 2284 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2285 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2286 2287 /* 2288 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2289 then, they will be summed up properly. This way, preallocation is always sufficient 2290 */ 2291 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2292 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2293 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2294 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2295 for (i=0;i<local_rows;i++) { 2296 index_row = global_indices[i]; 2297 for (j=i;j<local_rows;j++) { 2298 owner = row_ownership[index_row]; 2299 index_col = global_indices[j]; 2300 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2301 my_dnz[i] += 1.0; 2302 } else { /* offdiag block */ 2303 my_onz[i] += 1.0; 2304 } 2305 /* same as before, interchanging rows and cols */ 2306 if (i != j) { 2307 owner = row_ownership[index_col]; 2308 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2309 my_dnz[j] += 1.0; 2310 } else { 2311 my_onz[j] += 1.0; 2312 } 2313 } 2314 } 2315 } 2316 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2317 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2318 if (local_rows) { /* multilevel guard */ 2319 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2320 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2321 } 2322 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2323 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2324 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2325 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2326 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2327 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2328 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2329 2330 /* set computed preallocation in dnz and onz */ 2331 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2332 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2333 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2334 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2335 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2336 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2337 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2338 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2339 2340 /* Resize preallocation if overestimated */ 2341 for (i=0;i<lrows;i++) { 2342 dnz[i] = PetscMin(dnz[i],lcols); 2343 onz[i] = PetscMin(onz[i],cols-lcols); 2344 } 2345 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2346 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2347 2348 /* 2349 Set values. Very Basic. 2350 */ 2351 for (i=0;i<local_rows;i++) { 2352 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2353 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2354 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2355 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2356 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2357 } 2358 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2359 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 /* get back mat */ 2363 *M = new_mat; 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatISSubassemble_Private" 2369 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2370 { 2371 Mat subdomain_adj; 2372 IS new_ranks,ranks_send_to; 2373 MatPartitioning partitioner; 2374 Mat_IS *matis; 2375 PetscInt n_neighs,*neighs,*n_shared,**shared; 2376 PetscInt prank; 2377 PetscMPIInt size,rank,color; 2378 PetscInt *xadj,*adjncy,*oldranks; 2379 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2380 PetscInt i,j,n_subdomains,local_size,threshold=0; 2381 PetscErrorCode ierr; 2382 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2383 PetscSubcomm subcomm; 2384 2385 PetscFunctionBegin; 2386 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2387 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2388 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2389 2390 /* Get info on mapping */ 2391 matis = (Mat_IS*)(mat->data); 2392 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2393 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2394 2395 /* build local CSR graph of subdomains' connectivity */ 2396 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2397 xadj[0] = 0; 2398 xadj[1] = PetscMax(n_neighs-1,0); 2399 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2400 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2401 2402 if (threshold) { 2403 PetscInt* count,min_threshold; 2404 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2405 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2406 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2407 for (j=0;j<n_shared[i];j++) { 2408 count[shared[i][j]] += 1; 2409 } 2410 } 2411 /* adapt threshold since we dont want to lose significant connections */ 2412 min_threshold = n_neighs; 2413 for (i=1;i<n_neighs;i++) { 2414 for (j=0;j<n_shared[i];j++) { 2415 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2416 } 2417 } 2418 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2419 threshold = PetscMax(min_threshold+1,threshold); 2420 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2421 xadj[1] = 0; 2422 for (i=1;i<n_neighs;i++) { 2423 for (j=0;j<n_shared[i];j++) { 2424 if (count[shared[i][j]] < threshold) { 2425 adjncy[xadj[1]] = neighs[i]; 2426 adjncy_wgt[xadj[1]] = n_shared[i]; 2427 xadj[1]++; 2428 break; 2429 } 2430 } 2431 } 2432 ierr = PetscFree(count);CHKERRQ(ierr); 2433 } else { 2434 if (xadj[1]) { 2435 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2436 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2437 } 2438 } 2439 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2440 if (use_square) { 2441 for (i=0;i<xadj[1];i++) { 2442 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2443 } 2444 } 2445 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2446 2447 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2448 2449 /* 2450 Restrict work on active processes only. 2451 */ 2452 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2453 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2454 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2455 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2456 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr); 2457 if (color) { 2458 ierr = PetscFree(xadj);CHKERRQ(ierr); 2459 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2460 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2461 } else { 2462 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2463 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2464 prank = rank; 2465 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2466 for (i=0;i<size;i++) { 2467 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2468 } 2469 for (i=0;i<xadj[1];i++) { 2470 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2471 } 2472 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2473 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2474 n_subdomains = (PetscInt)size; 2475 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2476 2477 /* Partition */ 2478 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2479 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2480 if (use_vwgt) { 2481 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2482 v_wgt[0] = local_size; 2483 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2484 } 2485 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2486 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2487 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2488 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2489 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2490 2491 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2492 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2493 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2494 /* clean up */ 2495 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2496 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2497 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2498 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2499 } 2500 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2501 2502 /* assemble parallel IS for sends */ 2503 i = 1; 2504 if (color) i=0; 2505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2506 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2507 2508 /* get back IS */ 2509 *is_sends = ranks_send_to; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2514 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "MatISSubassemble" 2517 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2518 { 2519 Mat local_mat,new_mat; 2520 Mat_IS *matis; 2521 IS is_sends_internal; 2522 PetscInt rows,cols; 2523 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2524 PetscBool ismatis,isdense; 2525 ISLocalToGlobalMapping l2gmap; 2526 PetscInt* l2gmap_indices; 2527 const PetscInt* is_indices; 2528 MatType new_local_type; 2529 MatTypePrivate new_local_type_private; 2530 /* buffers */ 2531 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2532 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2533 /* MPI */ 2534 MPI_Comm comm; 2535 PetscMPIInt n_sends,n_recvs,commsize; 2536 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2537 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2538 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2539 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 /* checks */ 2544 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2545 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2546 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2547 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2548 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2549 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2550 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2551 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2552 PetscValidLogicalCollectiveInt(mat,bs,0); 2553 /* prepare IS for sending if not provided */ 2554 if (!is_sends) { 2555 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2556 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2557 } else { 2558 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2559 is_sends_internal = is_sends; 2560 } 2561 2562 /* get pointer of MATIS data */ 2563 matis = (Mat_IS*)mat->data; 2564 2565 /* get comm */ 2566 comm = PetscObjectComm((PetscObject)mat); 2567 2568 /* compute number of sends */ 2569 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2570 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2571 2572 /* compute number of receives */ 2573 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2574 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2575 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2576 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2577 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2578 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2579 ierr = PetscFree(iflags);CHKERRQ(ierr); 2580 2581 /* prepare send/receive buffers */ 2582 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2583 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2584 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2585 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2586 2587 /* Get data from local mat */ 2588 if (!isdense) { 2589 /* TODO: See below some guidelines on how to prepare the local buffers */ 2590 /* 2591 send_buffer_vals should contain the raw values of the local matrix 2592 send_buffer_idxs should contain: 2593 - MatType_PRIVATE type 2594 - PetscInt size_of_l2gmap 2595 - PetscInt global_row_indices[size_of_l2gmap] 2596 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2597 */ 2598 } else { 2599 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2600 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2601 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2602 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2603 send_buffer_idxs[1] = i; 2604 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2605 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2606 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2607 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2608 for (i=0;i<n_sends;i++) { 2609 ilengths_vals[is_indices[i]] = len*len; 2610 ilengths_idxs[is_indices[i]] = len+2; 2611 } 2612 } 2613 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2614 buf_size_idxs = 0; 2615 buf_size_vals = 0; 2616 for (i=0;i<n_recvs;i++) { 2617 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2618 buf_size_vals += (PetscInt)olengths_vals[i]; 2619 } 2620 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2621 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2622 2623 /* get new tags for clean communications */ 2624 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2625 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2626 2627 /* allocate for requests */ 2628 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2629 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2630 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2631 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2632 2633 /* communications */ 2634 ptr_idxs = recv_buffer_idxs; 2635 ptr_vals = recv_buffer_vals; 2636 for (i=0;i<n_recvs;i++) { 2637 source_dest = onodes[i]; 2638 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2639 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2640 ptr_idxs += olengths_idxs[i]; 2641 ptr_vals += olengths_vals[i]; 2642 } 2643 for (i=0;i<n_sends;i++) { 2644 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2645 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2646 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2647 } 2648 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2649 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2650 2651 /* assemble new l2g map */ 2652 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2653 ptr_idxs = recv_buffer_idxs; 2654 buf_size_idxs = 0; 2655 for (i=0;i<n_recvs;i++) { 2656 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2657 ptr_idxs += olengths_idxs[i]; 2658 } 2659 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2660 ptr_idxs = recv_buffer_idxs; 2661 buf_size_idxs = 0; 2662 for (i=0;i<n_recvs;i++) { 2663 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2664 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2665 ptr_idxs += olengths_idxs[i]; 2666 } 2667 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2668 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2669 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2670 2671 /* infer new local matrix type from received local matrices type */ 2672 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2673 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2674 new_local_type_private = MATAIJ_PRIVATE; 2675 new_local_type = MATSEQAIJ; 2676 if (n_recvs) { 2677 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2678 ptr_idxs = recv_buffer_idxs; 2679 for (i=0;i<n_recvs;i++) { 2680 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2681 new_local_type_private = MATAIJ_PRIVATE; 2682 break; 2683 } 2684 ptr_idxs += olengths_idxs[i]; 2685 } 2686 switch (new_local_type_private) { 2687 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2688 new_local_type = MATSEQAIJ; 2689 bs = 1; 2690 break; 2691 case MATAIJ_PRIVATE: 2692 new_local_type = MATSEQAIJ; 2693 bs = 1; 2694 break; 2695 case MATBAIJ_PRIVATE: 2696 new_local_type = MATSEQBAIJ; 2697 break; 2698 case MATSBAIJ_PRIVATE: 2699 new_local_type = MATSEQSBAIJ; 2700 break; 2701 default: 2702 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2703 break; 2704 } 2705 } 2706 2707 /* create MATIS object */ 2708 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2709 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2710 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2711 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2712 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2713 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2714 2715 /* set values */ 2716 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2717 ptr_vals = recv_buffer_vals; 2718 ptr_idxs = recv_buffer_idxs; 2719 for (i=0;i<n_recvs;i++) { 2720 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2721 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2722 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2723 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2724 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2725 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2726 } 2727 ptr_idxs += olengths_idxs[i]; 2728 ptr_vals += olengths_vals[i]; 2729 } 2730 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2733 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2734 2735 { /* check */ 2736 Vec lvec,rvec; 2737 PetscReal infty_error; 2738 2739 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2740 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2741 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2742 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2743 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2744 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2745 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2746 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2747 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2748 } 2749 2750 /* free workspace */ 2751 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2752 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2753 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2754 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2755 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2756 if (isdense) { 2757 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2758 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2759 } else { 2760 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2761 } 2762 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2763 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2764 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2765 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2766 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2767 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2768 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2769 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2770 ierr = PetscFree(onodes);CHKERRQ(ierr); 2771 /* get back new mat */ 2772 *mat_n = new_mat; 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2778 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2779 { 2780 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2781 PC_IS *pcis = (PC_IS*)pc->data; 2782 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2783 MatNullSpace CoarseNullSpace=NULL; 2784 ISLocalToGlobalMapping coarse_islg; 2785 IS coarse_is; 2786 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2787 PetscInt im_active=-1,active_procs=-1; 2788 PC pc_temp; 2789 PCType coarse_pc_type; 2790 KSPType coarse_ksp_type; 2791 PetscBool multilevel_requested,multilevel_allowed; 2792 PetscBool setsym,issym,isbddc,isnn; 2793 MatStructure matstruct; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 /* Assign global numbering to coarse dofs */ 2798 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2799 2800 /* infer some info from user */ 2801 issym = PETSC_FALSE; 2802 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2803 multilevel_allowed = PETSC_FALSE; 2804 multilevel_requested = PETSC_FALSE; 2805 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2806 if (multilevel_requested) { 2807 /* count "active processes" */ 2808 im_active = !!(pcis->n); 2809 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2810 if (active_procs/pcbddc->coarsening_ratio < 2) { 2811 multilevel_allowed = PETSC_FALSE; 2812 } else { 2813 multilevel_allowed = PETSC_TRUE; 2814 } 2815 } 2816 2817 /* set defaults for coarse KSP and PC */ 2818 if (multilevel_allowed) { 2819 if (issym) { 2820 coarse_ksp_type = KSPRICHARDSON; 2821 } else { 2822 coarse_ksp_type = KSPCHEBYSHEV; 2823 } 2824 coarse_pc_type = PCBDDC; 2825 } else { 2826 coarse_ksp_type = KSPPREONLY; 2827 coarse_pc_type = PCREDUNDANT; 2828 } 2829 2830 /* create the coarse KSP object only once with defaults */ 2831 if (!pcbddc->coarse_ksp) { 2832 char prefix[256],str_level[3]; 2833 size_t len; 2834 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2835 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2836 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2837 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2838 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2839 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2840 /* prefix */ 2841 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2842 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2843 if (!pcbddc->current_level) { 2844 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2845 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2846 } else { 2847 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2848 if (pcbddc->current_level>1) len -= 2; 2849 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 2850 *(prefix+len)='\0'; 2851 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 2852 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 2853 } 2854 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 2855 } 2856 /* allow user customization */ 2857 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2858 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2859 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2860 2861 /* get some info after set from options */ 2862 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2863 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 2864 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2865 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 2866 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2867 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2868 isbddc = PETSC_FALSE; 2869 } 2870 2871 /* propagate BDDC info to the next level */ 2872 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2873 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2874 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2875 2876 /* creates temporary MATIS object for coarse matrix */ 2877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2878 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2880 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2881 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2882 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2884 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2885 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2886 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 2887 2888 /* assemble coarse matrix */ 2889 if (isbddc || isnn) { 2890 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2891 } else { 2892 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2893 } 2894 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2895 2896 /* create local to global scatters for coarse problem */ 2897 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2898 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2899 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2900 2901 /* propagate symmetry info to coarse matrix */ 2902 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 2903 2904 /* Compute coarse null space (special handling by BDDC only) */ 2905 if (pcbddc->NullSpace) { 2906 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 2907 if (isbddc) { 2908 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 2909 } else { 2910 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 2911 } 2912 } 2913 2914 /* set operators */ 2915 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 2916 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 2917 2918 /* additional KSP customization */ 2919 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 2920 if (max_it < 5) { 2921 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 2922 } 2923 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 2924 2925 2926 /* print some info if requested */ 2927 if (pcbddc->dbg_flag) { 2928 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 2929 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 2930 if (!multilevel_allowed) { 2931 if (multilevel_requested) { 2932 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2933 } else if (pcbddc->max_levels) { 2934 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 2935 } 2936 } 2937 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 2938 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2939 } 2940 2941 /* setup coarse ksp */ 2942 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2943 if (pcbddc->dbg_flag) { 2944 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 2945 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2946 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 2947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2948 } 2949 2950 /* Check coarse problem if requested */ 2951 if (pcbddc->dbg_flag) { 2952 KSP check_ksp; 2953 KSPType check_ksp_type; 2954 PC check_pc; 2955 Vec check_vec; 2956 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 2957 PetscInt its; 2958 PetscBool ispreonly,compute; 2959 2960 /* Create ksp object suitable for estimation of extreme eigenvalues */ 2961 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 2962 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2963 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 2964 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 2965 if (ispreonly) { 2966 check_ksp_type = KSPPREONLY; 2967 compute = PETSC_FALSE; 2968 } else { 2969 if (issym) check_ksp_type = KSPCG; 2970 else check_ksp_type = KSPGMRES; 2971 compute = PETSC_TRUE; 2972 } 2973 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 2974 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 2975 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 2976 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 2977 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 2978 /* create random vec */ 2979 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 2980 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 2981 if (CoarseNullSpace) { 2982 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 2983 } 2984 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2985 /* solve coarse problem */ 2986 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2987 if (CoarseNullSpace) { 2988 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 2989 } 2990 /* check coarse problem residual error */ 2991 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 2992 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2993 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2994 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 2995 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 2996 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 2997 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 2998 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 2999 /* get eigenvalue estimation if preonly has not been requested */ 3000 if (!ispreonly) { 3001 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3002 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3003 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3004 } 3005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3006 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3007 } 3008 /* free memory */ 3009 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3010 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3016 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3017 { 3018 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3019 PC_IS* pcis = (PC_IS*)pc->data; 3020 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3021 PetscInt i,j,coarse_size; 3022 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 /* get indices in local ordering for vertices and constraints */ 3027 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3028 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3029 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3030 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3031 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3032 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3033 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3034 3035 /* Compute global number of coarse dofs */ 3036 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3037 3038 /* check numbering */ 3039 if (pcbddc->dbg_flag) { 3040 PetscScalar coarsesum,*array; 3041 3042 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3043 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3044 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3045 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3046 for (i=0;i<pcbddc->local_primal_size;i++) { 3047 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3048 } 3049 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3050 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3051 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3052 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3053 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3054 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3055 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3056 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3057 for (i=0;i<pcis->n;i++) { 3058 if (array[i] == 1.0) { 3059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3060 } 3061 } 3062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3063 for (i=0;i<pcis->n;i++) { 3064 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3065 } 3066 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3067 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3068 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3069 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3070 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3072 if (pcbddc->dbg_flag > 1) { 3073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3076 for (i=0;i<pcbddc->local_primal_size;i++) { 3077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3078 } 3079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3080 } 3081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3082 } 3083 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3084 /* get back data */ 3085 *coarse_size_n = coarse_size; 3086 *local_primal_indices_n = local_primal_indices; 3087 PetscFunctionReturn(0); 3088 } 3089 3090