1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 166 /* Vector and matrix types */ 167 VecType impVecType; 168 MatType impMatType; 169 /* some shortcuts to scalars */ 170 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 171 /* for debugging purposes */ 172 PetscReal *coarsefunctions_errors,*constraints_errors; 173 174 PetscFunctionBegin; 175 /* get number of vertices and their local indices */ 176 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 177 n_constraints = pcbddc->local_primal_size-n_vertices; 178 /* Set Non-overlapping dimensions */ 179 n_B = pcis->n_B; n_D = pcis->n - n_B; 180 n_R = pcis->n-n_vertices; 181 182 /* Set types for local objects needed by BDDC precondtioner */ 183 impMatType = MATSEQDENSE; 184 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 185 186 /* Allocating some extra storage just to be safe */ 187 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 188 for (i=0;i<pcis->n;i++) auxindices[i]=i; 189 190 /* vertices in boundary numbering */ 191 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 193 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 194 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 195 if (i != n_vertices) { 196 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 197 } 198 199 /* some work vectors on vertices and/or constraints */ 200 if (n_vertices) { 201 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 202 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 203 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 204 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 205 } 206 if (n_constraints) { 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 208 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 209 } 210 211 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 212 if (n_constraints) { 213 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 214 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 215 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 216 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 217 218 /* Extract constraints on R nodes: C_{CR} */ 219 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 220 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 221 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 222 223 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 224 for (i=0;i<n_constraints;i++) { 225 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 226 /* Get row of constraint matrix in R numbering */ 227 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 228 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 229 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 230 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 231 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 232 /* Solve for row of constraint matrix in R numbering */ 233 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 234 /* Set values in local_auxmat2 */ 235 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 236 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 238 } 239 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 241 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 242 243 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 244 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 245 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 246 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 247 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 248 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 249 ierr = MatSetUp(M1);CHKERRQ(ierr); 250 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 251 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 252 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 253 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 254 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 255 ierr = MatDestroy(&M2);CHKERRQ(ierr); 256 ierr = MatDestroy(&M3);CHKERRQ(ierr); 257 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 258 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 259 } 260 261 /* Get submatrices from subdomain matrix */ 262 if (n_vertices) { 263 /* PetscInt ibs,iibs,mbs; */ 264 PetscBool issbaij,isbaij; 265 266 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 267 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 268 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 269 if (issbaij || isbaij) { 270 Mat newmat; 271 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 273 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 274 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 275 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 276 } else { 277 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 278 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 279 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 280 } 281 /* 282 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 283 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 284 ierr = ISGetBlockSize(is_aux,&iibs);CHKERRQ(ierr); 285 if (ibs != mbs || iibs != mbs) { 286 Mat newmat; 287 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 288 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 289 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 290 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 291 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 292 } else { 293 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 294 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 295 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 296 } 297 */ 298 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 299 } 300 301 /* Matrix of coarse basis functions (local) */ 302 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 303 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 304 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 305 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 306 if (pcbddc->switch_static || pcbddc->dbg_flag) { 307 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 308 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 309 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 310 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 311 } 312 313 if (pcbddc->dbg_flag) { 314 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 315 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 316 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 317 } 318 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 319 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 320 321 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 322 323 /* vertices */ 324 for (i=0;i<n_vertices;i++) { 325 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 326 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 327 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 328 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 329 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 330 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 331 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 332 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 333 if (n_constraints) { 334 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 335 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 336 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 337 } 338 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 339 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 340 341 /* Set values in coarse basis function and subdomain part of coarse_mat */ 342 /* coarse basis functions */ 343 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 344 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 345 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 346 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 347 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 348 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 349 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 350 if (pcbddc->switch_static || pcbddc->dbg_flag) { 351 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 352 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 353 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 354 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 355 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 356 } 357 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 358 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 359 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 360 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 361 if (n_constraints) { 362 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 363 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 364 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 365 } 366 367 /* check */ 368 if (pcbddc->dbg_flag) { 369 /* assemble subdomain vector on local nodes */ 370 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 371 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 372 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 373 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 374 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 375 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 376 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 377 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 378 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 379 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 380 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 384 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 385 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 386 } 387 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 389 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 390 /* check saddle point solution */ 391 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 392 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 393 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 394 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 395 /* shift by the identity matrix */ 396 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 397 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 398 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 399 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 400 } 401 } 402 403 /* constraints */ 404 for (i=0;i<n_constraints;i++) { 405 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 406 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 407 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 408 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 409 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 410 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 411 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 413 if (n_vertices) { 414 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 415 } 416 /* Set values in coarse basis function and subdomain part of coarse_mat */ 417 /* coarse basis functions */ 418 j = i+n_vertices; /* don't touch this! */ 419 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 420 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 421 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 423 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 425 if (pcbddc->switch_static || pcbddc->dbg_flag) { 426 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 427 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 429 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 431 } 432 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 433 if (n_vertices) { 434 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 435 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 437 } 438 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 439 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 441 442 if (pcbddc->dbg_flag) { 443 /* assemble subdomain vector on nodes */ 444 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 445 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 446 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 447 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 450 /* assemble subdomain vector of lagrange multipliers */ 451 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 452 if (n_vertices) { 453 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 454 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 455 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 456 } 457 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 458 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 459 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 460 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 462 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 463 /* check saddle point solution */ 464 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 465 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 466 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 467 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 468 /* shift by the identity matrix */ 469 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 470 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 471 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 472 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 473 } 474 } 475 /* call assembling routines for local coarse basis */ 476 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 477 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 478 if (pcbddc->switch_static || pcbddc->dbg_flag) { 479 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 480 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 481 } 482 483 /* compute other basis functions for non-symmetric problems */ 484 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 485 if (!setsym || (setsym && !issym)) { 486 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 487 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 488 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 489 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 490 if (pcbddc->switch_static || pcbddc->dbg_flag) { 491 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 492 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 493 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 494 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 495 } 496 for (i=0;i<pcbddc->local_primal_size;i++) { 497 if (n_constraints) { 498 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 499 for (j=0;j<n_constraints;j++) { 500 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 501 } 502 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 503 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 504 } 505 if (i<n_vertices) { 506 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 507 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 509 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 510 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 511 if (n_constraints) { 512 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 513 } 514 } else { 515 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 516 } 517 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 518 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 519 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 521 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 522 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 523 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 524 if (i<n_vertices) { 525 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 526 } 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 529 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 530 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 531 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 532 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 533 } 534 535 if (pcbddc->dbg_flag) { 536 /* assemble subdomain vector on nodes */ 537 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 538 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 539 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 540 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 541 if (i<n_vertices) { 542 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 545 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 546 /* assemble subdomain vector of lagrange multipliers */ 547 for (j=0;j<pcbddc->local_primal_size;j++) { 548 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 549 } 550 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 551 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 552 /* check saddle point solution */ 553 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 554 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 555 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 556 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 557 /* shift by the identity matrix */ 558 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 561 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 562 } 563 } 564 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 565 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 567 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 568 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 } 570 } 571 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 572 /* Checking coarse_sub_mat and coarse basis functios */ 573 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 574 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 575 if (pcbddc->dbg_flag) { 576 Mat coarse_sub_mat; 577 Mat AUXMAT,TM1,TM2,TM3,TM4; 578 Mat coarse_phi_D,coarse_phi_B; 579 Mat coarse_psi_D,coarse_psi_B; 580 Mat A_II,A_BB,A_IB,A_BI; 581 MatType checkmattype=MATSEQAIJ; 582 PetscReal real_value; 583 584 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 585 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 586 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 587 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 588 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 589 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 590 if (pcbddc->coarse_psi_B) { 591 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 592 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 593 } 594 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 595 596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 597 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 598 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 599 if (pcbddc->coarse_psi_B) { 600 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 601 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 602 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 603 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 604 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 605 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 606 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 607 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 608 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 609 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 610 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 611 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 612 } else { 613 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 614 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 615 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 616 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 617 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 618 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 619 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 620 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 621 } 622 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 623 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 624 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 625 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 626 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 627 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 632 for (i=0;i<pcbddc->local_primal_size;i++) { 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 634 } 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 636 for (i=0;i<pcbddc->local_primal_size;i++) { 637 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 638 } 639 if (pcbddc->coarse_psi_B) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 641 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 643 } 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 645 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 647 } 648 } 649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 652 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 653 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 654 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 655 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 656 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 657 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 658 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 659 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 660 if (pcbddc->coarse_psi_B) { 661 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 662 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 663 } 664 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 665 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 666 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 667 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 668 } 669 /* free memory */ 670 if (n_vertices) { 671 ierr = PetscFree(vertices);CHKERRQ(ierr); 672 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 673 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 674 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 675 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 676 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 677 } 678 if (n_constraints) { 679 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 680 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 681 ierr = MatDestroy(&M1);CHKERRQ(ierr); 682 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 683 } 684 ierr = PetscFree(auxindices);CHKERRQ(ierr); 685 /* get back data */ 686 *coarse_submat_vals_n = coarse_submat_vals; 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 692 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 693 { 694 PC_IS* pcis = (PC_IS*)(pc->data); 695 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 696 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 697 PetscBool issbaij,isbaij; 698 /* manage repeated solves */ 699 MatReuse reuse; 700 MatStructure matstruct; 701 PetscErrorCode ierr; 702 703 PetscFunctionBegin; 704 /* get mat flags */ 705 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 706 reuse = MAT_INITIAL_MATRIX; 707 if (pc->setupcalled) { 708 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 709 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 710 if (matstruct == SAME_NONZERO_PATTERN) { 711 reuse = MAT_REUSE_MATRIX; 712 } else { 713 reuse = MAT_INITIAL_MATRIX; 714 } 715 } 716 if (reuse == MAT_INITIAL_MATRIX) { 717 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 722 } 723 724 /* transform local matrices if needed */ 725 if (pcbddc->use_change_of_basis) { 726 Mat change_mat_all; 727 PetscScalar *row_cmat_values; 728 PetscInt *row_cmat_indices; 729 PetscInt *nnz,*is_indices,*temp_indices; 730 PetscInt i,j,k,n_D,n_B; 731 732 /* Get Non-overlapping dimensions */ 733 n_B = pcis->n_B; 734 n_D = pcis->n-n_B; 735 736 /* compute nonzero structure of change of basis on all local nodes */ 737 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 738 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 740 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 742 k=1; 743 for (i=0;i<n_B;i++) { 744 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 745 nnz[is_indices[i]]=j; 746 if (k < j) k = j; 747 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 748 } 749 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 750 /* assemble change of basis matrix on the whole set of local dofs */ 751 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 752 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 753 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 754 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 755 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 756 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 757 for (i=0;i<n_D;i++) { 758 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 759 } 760 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 761 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 762 for (i=0;i<n_B;i++) { 763 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 764 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 765 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 766 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 767 } 768 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 769 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 770 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 771 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 772 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 773 if (!issbaij && !isbaij) { 774 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 775 } else { 776 Mat work_mat; 777 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 778 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 780 } 781 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 782 ierr = PetscFree(nnz);CHKERRQ(ierr); 783 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 784 } else { 785 /* without change of basis, the local matrix is unchanged */ 786 if (!pcbddc->local_mat) { 787 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 788 pcbddc->local_mat = matis->A; 789 } 790 } 791 792 /* get submatrices */ 793 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 794 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 795 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 796 if (!issbaij) { 797 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 798 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 799 } else { 800 Mat newmat; 801 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 802 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 803 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 804 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 811 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 812 { 813 PC_IS* pcis = (PC_IS*)(pc->data); 814 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 815 IS is_aux1,is_aux2; 816 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 817 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 818 PetscBT bitmask; 819 PetscErrorCode ierr; 820 821 PetscFunctionBegin; 822 /* Set Non-overlapping dimensions */ 823 n_B = pcis->n_B; n_D = pcis->n - n_B; 824 /* get vertex indices from constraint matrix */ 825 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 826 /* Set number of constraints */ 827 n_constraints = pcbddc->local_primal_size-n_vertices; 828 /* create auxiliary bitmask */ 829 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 830 for (i=0;i<n_vertices;i++) { 831 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 832 } 833 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 834 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 835 for (i=0, n_R=0; i<pcis->n; i++) { 836 if (!PetscBTLookup(bitmask,i)) { 837 idx_R_local[n_R] = i; 838 n_R++; 839 } 840 } 841 ierr = PetscFree(vertices);CHKERRQ(ierr); 842 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 843 844 /* print some info if requested */ 845 if (pcbddc->dbg_flag) { 846 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 847 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 848 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 851 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 853 } 854 855 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 856 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 857 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 858 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 859 for (i=0; i<n_D; i++) { 860 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 861 } 862 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 863 for (i=0, j=0; i<n_R; i++) { 864 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 865 aux_array1[j++] = i; 866 } 867 } 868 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 869 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 870 for (i=0, j=0; i<n_B; i++) { 871 if (!PetscBTLookup(bitmask,is_indices[i])) { 872 aux_array2[j++] = i; 873 } 874 } 875 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 876 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 877 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 878 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 879 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 880 881 if (pcbddc->switch_static || pcbddc->dbg_flag) { 882 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 883 for (i=0, j=0; i<n_R; i++) { 884 if (PetscBTLookup(bitmask,idx_R_local[i])) { 885 aux_array1[j++] = i; 886 } 887 } 888 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 889 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 890 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 891 } 892 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 899 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 900 { 901 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 902 PC_IS *pcis = (PC_IS*)pc->data; 903 PC pc_temp; 904 Mat A_RR; 905 Vec vec1,vec2,vec3; 906 MatStructure matstruct; 907 PetscScalar m_one = -1.0; 908 PetscReal value; 909 PetscInt n_D,n_R,ibs,mbs; 910 PetscBool use_exact,use_exact_reduced,issbaij; 911 PetscErrorCode ierr; 912 /* prefixes stuff */ 913 char dir_prefix[256],neu_prefix[256],str_level[3]; 914 size_t len; 915 916 PetscFunctionBegin; 917 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 918 919 /* compute prefixes */ 920 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 921 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 922 if (!pcbddc->current_level) { 923 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 924 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 925 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 926 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 927 } else { 928 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 929 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 930 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 931 len -= 15; /* remove "pc_bddc_coarse_" */ 932 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 933 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 934 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 935 *(dir_prefix+len)='\0'; 936 *(neu_prefix+len)='\0'; 937 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 938 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 939 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 940 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 941 } 942 943 /* DIRICHLET PROBLEM */ 944 /* Matrix for Dirichlet problem is pcis->A_II */ 945 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 946 if (!pcbddc->ksp_D) { /* create object if not yet build */ 947 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 948 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 949 /* default */ 950 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 951 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 952 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 953 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 954 if (issbaij) { 955 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 956 } else { 957 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 958 } 959 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 960 } 961 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 962 /* Allow user's customization */ 963 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 964 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 965 if (!n_D) { 966 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 967 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 968 } 969 /* Set Up KSP for Dirichlet problem of BDDC */ 970 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 971 /* set ksp_D into pcis data */ 972 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 973 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 974 pcis->ksp_D = pcbddc->ksp_D; 975 976 /* NEUMANN PROBLEM */ 977 /* Matrix for Neumann problem is A_RR -> we need to create it */ 978 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 979 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 980 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 981 if (ibs != mbs) { /* TODO BAIJ: see if this can be avoided */ 982 Mat newmat; 983 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 984 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 985 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 986 } else { 987 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 988 } 989 if (!pcbddc->ksp_R) { /* create object if not yet build */ 990 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 991 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 992 /* default */ 993 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 994 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 995 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 996 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 997 if (issbaij) { 998 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 999 } else { 1000 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1001 } 1002 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1003 } 1004 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1005 /* Allow user's customization */ 1006 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1007 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1008 if (!n_R) { 1009 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1010 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1011 } 1012 /* Set Up KSP for Neumann problem of BDDC */ 1013 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1014 1015 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1016 1017 /* Dirichlet */ 1018 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1019 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1020 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1021 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1022 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1023 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1024 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1025 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1026 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1027 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1028 /* need to be adapted? */ 1029 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1030 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1031 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1032 /* print info */ 1033 if (pcbddc->dbg_flag) { 1034 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1038 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1039 } 1040 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1041 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1042 } 1043 1044 /* Neumann */ 1045 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1046 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1047 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1048 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1049 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1050 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1051 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1052 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1053 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1054 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1055 /* need to be adapted? */ 1056 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1057 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1058 /* print info */ 1059 if (pcbddc->dbg_flag) { 1060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1061 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 } 1063 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1064 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1065 } 1066 /* free Neumann problem's matrix */ 1067 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1073 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1074 { 1075 PetscErrorCode ierr; 1076 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1077 1078 PetscFunctionBegin; 1079 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1080 if (pcbddc->local_auxmat1) { 1081 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1082 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1089 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1090 { 1091 PetscErrorCode ierr; 1092 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1093 PC_IS* pcis = (PC_IS*) (pc->data); 1094 const PetscScalar zero = 0.0; 1095 1096 PetscFunctionBegin; 1097 /* Application of PHI^T (or PSI^T) */ 1098 if (pcbddc->coarse_psi_B) { 1099 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1100 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1101 } else { 1102 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1103 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1104 } 1105 /* Scatter data of coarse_rhs */ 1106 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1107 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 1109 /* Local solution on R nodes */ 1110 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1111 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1112 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1113 if (pcbddc->switch_static) { 1114 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1115 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1116 } 1117 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1118 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1119 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121 if (pcbddc->switch_static) { 1122 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1123 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1124 } 1125 1126 /* Coarse solution */ 1127 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1128 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1129 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1130 } 1131 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1132 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 1134 /* Sum contributions from two levels */ 1135 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1136 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1142 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1143 { 1144 PetscErrorCode ierr; 1145 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1146 1147 PetscFunctionBegin; 1148 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1154 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1155 { 1156 PetscErrorCode ierr; 1157 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1158 1159 PetscFunctionBegin; 1160 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* uncomment for testing purposes */ 1165 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1168 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1169 { 1170 PetscErrorCode ierr; 1171 PC_IS* pcis = (PC_IS*)(pc->data); 1172 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1173 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1174 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1175 MatType impMatType=MATSEQAIJ; 1176 /* one and zero */ 1177 PetscScalar one=1.0,zero=0.0; 1178 /* space to store constraints and their local indices */ 1179 PetscScalar *temp_quadrature_constraint; 1180 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1181 /* iterators */ 1182 PetscInt i,j,k,total_counts,temp_start_ptr; 1183 /* stuff to store connected components stored in pcbddc->mat_graph */ 1184 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1185 PetscInt n_ISForFaces,n_ISForEdges; 1186 /* near null space stuff */ 1187 MatNullSpace nearnullsp; 1188 const Vec *nearnullvecs; 1189 Vec *localnearnullsp; 1190 PetscBool nnsp_has_cnst; 1191 PetscInt nnsp_size; 1192 PetscScalar *array; 1193 /* BLAS integers */ 1194 PetscBLASInt lwork,lierr; 1195 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1196 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1197 /* LAPACK working arrays for SVD or POD */ 1198 PetscBool skip_lapack; 1199 PetscScalar *work; 1200 PetscReal *singular_vals; 1201 #if defined(PETSC_USE_COMPLEX) 1202 PetscReal *rwork; 1203 #endif 1204 #if defined(PETSC_MISSING_LAPACK_GESVD) 1205 PetscBLASInt Blas_one_2=1; 1206 PetscScalar *temp_basis,*correlation_mat; 1207 #else 1208 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1209 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1210 #endif 1211 /* change of basis */ 1212 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1213 PetscBool boolforchange,qr_needed; 1214 PetscBT touched,change_basis,qr_needed_idx; 1215 /* auxiliary stuff */ 1216 PetscInt *nnz,*is_indices,*local_to_B; 1217 /* some quantities */ 1218 PetscInt n_vertices,total_primal_vertices; 1219 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1220 1221 1222 PetscFunctionBegin; 1223 /* Get index sets for faces, edges and vertices from graph */ 1224 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1225 pcbddc->use_vertices = PETSC_TRUE; 1226 } 1227 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1228 /* HACK: provide functions to set change of basis */ 1229 if (!ISForVertices && pcbddc->NullSpace) { 1230 pcbddc->use_change_of_basis = PETSC_TRUE; 1231 pcbddc->use_change_on_faces = PETSC_FALSE; 1232 } 1233 /* print some info */ 1234 if (pcbddc->dbg_flag) { 1235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1236 i = 0; 1237 if (ISForVertices) { 1238 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1239 } 1240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1241 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1243 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1244 } 1245 /* check if near null space is attached to global mat */ 1246 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1247 if (nearnullsp) { 1248 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1249 } else { /* if near null space is not provided BDDC uses constants by default */ 1250 nnsp_size = 0; 1251 nnsp_has_cnst = PETSC_TRUE; 1252 } 1253 /* get max number of constraints on a single cc */ 1254 max_constraints = nnsp_size; 1255 if (nnsp_has_cnst) max_constraints++; 1256 1257 /* 1258 Evaluate maximum storage size needed by the procedure 1259 - temp_indices will contain start index of each constraint stored as follows 1260 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1261 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1262 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1263 */ 1264 total_counts = n_ISForFaces+n_ISForEdges; 1265 total_counts *= max_constraints; 1266 n_vertices = 0; 1267 if (ISForVertices) { 1268 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1269 } 1270 total_counts += n_vertices; 1271 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1272 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1273 total_counts = 0; 1274 max_size_of_constraint = 0; 1275 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1276 if (i<n_ISForEdges) { 1277 used_IS = &ISForEdges[i]; 1278 } else { 1279 used_IS = &ISForFaces[i-n_ISForEdges]; 1280 } 1281 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1282 total_counts += j; 1283 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1284 } 1285 total_counts *= max_constraints; 1286 total_counts += n_vertices; 1287 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1288 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1289 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1290 /* local to boundary numbering */ 1291 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1292 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1293 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1294 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1295 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1296 /* get local part of global near null space vectors */ 1297 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1298 for (k=0;k<nnsp_size;k++) { 1299 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1300 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1301 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 } 1303 1304 /* whether or not to skip lapack calls */ 1305 skip_lapack = PETSC_TRUE; 1306 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1307 1308 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1309 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1310 PetscScalar temp_work; 1311 #if defined(PETSC_MISSING_LAPACK_GESVD) 1312 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1313 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1314 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1315 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1316 #if defined(PETSC_USE_COMPLEX) 1317 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1318 #endif 1319 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1320 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1321 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1322 lwork = -1; 1323 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1324 #if !defined(PETSC_USE_COMPLEX) 1325 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1326 #else 1327 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1328 #endif 1329 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1330 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1331 #else /* on missing GESVD */ 1332 /* SVD */ 1333 PetscInt max_n,min_n; 1334 max_n = max_size_of_constraint; 1335 min_n = max_constraints; 1336 if (max_size_of_constraint < max_constraints) { 1337 min_n = max_size_of_constraint; 1338 max_n = max_constraints; 1339 } 1340 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1341 #if defined(PETSC_USE_COMPLEX) 1342 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1343 #endif 1344 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1345 lwork = -1; 1346 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1347 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1348 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1349 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1350 #if !defined(PETSC_USE_COMPLEX) 1351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1352 #else 1353 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1354 #endif 1355 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1356 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1357 #endif /* on missing GESVD */ 1358 /* Allocate optimal workspace */ 1359 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1360 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1361 } 1362 /* Now we can loop on constraining sets */ 1363 total_counts = 0; 1364 temp_indices[0] = 0; 1365 /* vertices */ 1366 if (ISForVertices) { 1367 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1368 if (nnsp_has_cnst) { /* consider all vertices */ 1369 for (i=0;i<n_vertices;i++) { 1370 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1371 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1372 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1373 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1374 total_counts++; 1375 } 1376 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1377 PetscBool used_vertex; 1378 for (i=0;i<n_vertices;i++) { 1379 used_vertex = PETSC_FALSE; 1380 k = 0; 1381 while (!used_vertex && k<nnsp_size) { 1382 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1383 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1384 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1385 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1386 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1387 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1388 total_counts++; 1389 used_vertex = PETSC_TRUE; 1390 } 1391 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1392 k++; 1393 } 1394 } 1395 } 1396 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1397 n_vertices = total_counts; 1398 } 1399 1400 /* edges and faces */ 1401 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1402 if (i<n_ISForEdges) { 1403 used_IS = &ISForEdges[i]; 1404 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1405 } else { 1406 used_IS = &ISForFaces[i-n_ISForEdges]; 1407 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1408 } 1409 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1410 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1411 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1412 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1413 /* change of basis should not be performed on local periodic nodes */ 1414 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1415 if (nnsp_has_cnst) { 1416 PetscScalar quad_value; 1417 temp_constraints++; 1418 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1419 for (j=0;j<size_of_constraint;j++) { 1420 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1421 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1422 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1423 } 1424 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1425 if (boolforchange) { 1426 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1427 } 1428 total_counts++; 1429 } 1430 for (k=0;k<nnsp_size;k++) { 1431 PetscReal real_value; 1432 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1433 for (j=0;j<size_of_constraint;j++) { 1434 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1435 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1436 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1437 } 1438 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1439 /* check if array is null on the connected component */ 1440 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1441 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1442 if (real_value > 0.0) { /* keep indices and values */ 1443 temp_constraints++; 1444 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1445 if (boolforchange) { 1446 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1447 } 1448 total_counts++; 1449 } 1450 } 1451 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1452 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1453 if (!pcbddc->use_nnsp_true && temp_constraints) { 1454 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1455 1456 #if defined(PETSC_MISSING_LAPACK_GESVD) 1457 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1458 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1459 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1460 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1461 from that computed using LAPACKgesvd 1462 -> This is due to a different computation of eigenvectors in LAPACKheev 1463 -> The quality of the POD-computed basis will be the same */ 1464 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1465 /* Store upper triangular part of correlation matrix */ 1466 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1467 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1468 for (j=0;j<temp_constraints;j++) { 1469 for (k=0;k<j+1;k++) { 1470 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1471 } 1472 } 1473 /* compute eigenvalues and eigenvectors of correlation matrix */ 1474 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1475 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1476 #if !defined(PETSC_USE_COMPLEX) 1477 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1478 #else 1479 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1480 #endif 1481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1483 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1484 j=0; 1485 while (j < temp_constraints && singular_vals[j] < tol) j++; 1486 total_counts=total_counts-j; 1487 /* scale and copy POD basis into used quadrature memory */ 1488 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1489 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1490 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1491 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1492 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1494 if (j<temp_constraints) { 1495 PetscInt ii; 1496 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1497 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1498 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1499 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1500 for (k=0;k<temp_constraints-j;k++) { 1501 for (ii=0;ii<size_of_constraint;ii++) { 1502 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1503 } 1504 } 1505 } 1506 #else /* on missing GESVD */ 1507 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1508 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1510 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1511 #if !defined(PETSC_USE_COMPLEX) 1512 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1513 #else 1514 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1515 #endif 1516 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1517 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1518 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1519 k = temp_constraints; 1520 if (k > size_of_constraint) k = size_of_constraint; 1521 j = 0; 1522 while (j < k && singular_vals[k-j-1] < tol) j++; 1523 total_counts = total_counts-temp_constraints+k-j; 1524 #endif /* on missing GESVD */ 1525 } 1526 } 1527 /* free index sets of faces, edges and vertices */ 1528 for (i=0;i<n_ISForFaces;i++) { 1529 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1530 } 1531 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1532 for (i=0;i<n_ISForEdges;i++) { 1533 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1534 } 1535 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1536 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1537 1538 /* free workspace */ 1539 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1540 ierr = PetscFree(work);CHKERRQ(ierr); 1541 #if defined(PETSC_USE_COMPLEX) 1542 ierr = PetscFree(rwork);CHKERRQ(ierr); 1543 #endif 1544 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1545 #if defined(PETSC_MISSING_LAPACK_GESVD) 1546 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1547 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1548 #endif 1549 } 1550 for (k=0;k<nnsp_size;k++) { 1551 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1552 } 1553 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1554 1555 /* set quantities in pcbddc data structure */ 1556 /* n_vertices defines the number of subdomain corners in the primal space */ 1557 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1558 pcbddc->local_primal_size = total_counts; 1559 pcbddc->n_vertices = n_vertices; 1560 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1561 1562 /* Create constraint matrix */ 1563 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1564 /* so we need to set it up properly either with or without change of basis */ 1565 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1566 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1567 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1568 /* array to compute a local numbering of constraints : vertices first then constraints */ 1569 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1570 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1571 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1572 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1573 /* auxiliary stuff for basis change */ 1574 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1575 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1576 1577 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1578 total_primal_vertices=0; 1579 for (i=0;i<pcbddc->local_primal_size;i++) { 1580 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1581 if (size_of_constraint == 1) { 1582 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1583 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1584 aux_primal_minloc[total_primal_vertices]=0; 1585 total_primal_vertices++; 1586 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1587 PetscInt min_loc,min_index; 1588 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1589 /* find first untouched local node */ 1590 k = 0; 1591 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1592 min_index = global_indices[k]; 1593 min_loc = k; 1594 /* search the minimum among global nodes already untouched on the cc */ 1595 for (k=1;k<size_of_constraint;k++) { 1596 /* there can be more than one constraint on a single connected component */ 1597 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1598 min_index = global_indices[k]; 1599 min_loc = k; 1600 } 1601 } 1602 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1603 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1604 aux_primal_minloc[total_primal_vertices]=min_loc; 1605 total_primal_vertices++; 1606 } 1607 } 1608 /* determine if a QR strategy is needed for change of basis */ 1609 qr_needed = PETSC_FALSE; 1610 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1611 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1612 if (PetscBTLookup(change_basis,i)) { 1613 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1614 j = 0; 1615 for (k=0;k<size_of_constraint;k++) { 1616 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1617 j++; 1618 } 1619 } 1620 /* found more than one primal dof on the cc */ 1621 if (j > 1) { 1622 PetscBTSet(qr_needed_idx,i); 1623 qr_needed = PETSC_TRUE; 1624 } 1625 } 1626 } 1627 /* free workspace */ 1628 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1629 /* permute indices in order to have a sorted set of vertices */ 1630 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1631 1632 /* nonzero structure of constraint matrix */ 1633 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1634 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1635 j=total_primal_vertices; 1636 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1637 if (!PetscBTLookup(change_basis,i)) { 1638 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1639 j++; 1640 } 1641 } 1642 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1643 ierr = PetscFree(nnz);CHKERRQ(ierr); 1644 /* set values in constraint matrix */ 1645 for (i=0;i<total_primal_vertices;i++) { 1646 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1647 } 1648 total_counts = total_primal_vertices; 1649 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1650 if (!PetscBTLookup(change_basis,i)) { 1651 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1652 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1653 total_counts++; 1654 } 1655 } 1656 /* assembling */ 1657 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 /* 1660 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1661 */ 1662 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1663 if (pcbddc->use_change_of_basis) { 1664 /* dual and primal dofs on a single cc */ 1665 PetscInt dual_dofs,primal_dofs; 1666 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1667 PetscInt primal_counter; 1668 /* working stuff for GEQRF */ 1669 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1670 PetscBLASInt lqr_work; 1671 /* working stuff for UNGQR */ 1672 PetscScalar *gqr_work,lgqr_work_t; 1673 PetscBLASInt lgqr_work; 1674 /* working stuff for TRTRS */ 1675 PetscScalar *trs_rhs; 1676 PetscBLASInt Blas_NRHS; 1677 /* pointers for values insertion into change of basis matrix */ 1678 PetscInt *start_rows,*start_cols; 1679 PetscScalar *start_vals; 1680 /* working stuff for values insertion */ 1681 PetscBT is_primal; 1682 1683 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1684 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1685 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1686 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1687 /* work arrays */ 1688 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1689 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1690 /* nonzeros per row */ 1691 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1692 if (PetscBTLookup(change_basis,i)) { 1693 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1694 if (PetscBTLookup(qr_needed_idx,i)) { 1695 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1696 } else { 1697 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1698 /* get local primal index on the cc */ 1699 j = 0; 1700 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1701 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1702 } 1703 } 1704 } 1705 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1706 ierr = PetscFree(nnz);CHKERRQ(ierr); 1707 /* Set initial identity in the matrix */ 1708 for (i=0;i<pcis->n_B;i++) { 1709 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1710 } 1711 1712 if (pcbddc->dbg_flag) { 1713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1715 } 1716 1717 1718 /* Now we loop on the constraints which need a change of basis */ 1719 /* 1720 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1721 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1722 1723 Basic blocks of change of basis matrix T computed 1724 1725 - Using the following block transformation if there is only a primal dof on the cc 1726 (in the example, primal dof is the last one of the edge in LOCAL ordering 1727 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1728 | 1 0 ... 0 1 | 1729 | 0 1 ... 0 1 | 1730 | ... | 1731 | 0 ... 1 1 | 1732 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1733 1734 - via QR decomposition of constraints otherwise 1735 */ 1736 if (qr_needed) { 1737 /* space to store Q */ 1738 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1739 /* first we issue queries for optimal work */ 1740 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1741 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1742 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1743 lqr_work = -1; 1744 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1745 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1746 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1747 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1748 lgqr_work = -1; 1749 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1750 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1751 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1752 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1753 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1754 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1755 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1756 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1757 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1758 /* array to store scaling factors for reflectors */ 1759 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1760 /* array to store rhs and solution of triangular solver */ 1761 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1762 /* allocating workspace for check */ 1763 if (pcbddc->dbg_flag) { 1764 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1765 } 1766 } 1767 /* array to store whether a node is primal or not */ 1768 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1769 for (i=0;i<total_primal_vertices;i++) { 1770 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1771 } 1772 1773 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1774 /* -> using implicit ordering contained in temp_indices data */ 1775 total_counts = pcbddc->n_vertices; 1776 primal_counter = total_counts; 1777 while (total_counts<pcbddc->local_primal_size) { 1778 primal_dofs = 1; 1779 if (PetscBTLookup(change_basis,total_counts)) { 1780 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1781 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1782 primal_dofs++; 1783 } 1784 /* get constraint info */ 1785 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1786 dual_dofs = size_of_constraint-primal_dofs; 1787 1788 if (pcbddc->dbg_flag) { 1789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1790 } 1791 1792 if (primal_dofs > 1) { /* QR */ 1793 1794 /* copy quadrature constraints for change of basis check */ 1795 if (pcbddc->dbg_flag) { 1796 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1797 } 1798 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1799 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1800 1801 /* compute QR decomposition of constraints */ 1802 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1803 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1805 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1806 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1807 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1808 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1809 1810 /* explictly compute R^-T */ 1811 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1812 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1813 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1814 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1816 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1818 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1819 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1820 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1821 1822 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1823 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1824 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1825 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1827 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1828 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1829 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1830 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1831 1832 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1833 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1834 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1835 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1836 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1837 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1838 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1839 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1840 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1841 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1842 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1843 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1844 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1845 1846 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1847 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1848 /* insert cols for primal dofs */ 1849 for (j=0;j<primal_dofs;j++) { 1850 start_vals = &qr_basis[j*size_of_constraint]; 1851 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1852 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1853 } 1854 /* insert cols for dual dofs */ 1855 for (j=0,k=0;j<dual_dofs;k++) { 1856 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1857 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1858 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1859 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1860 j++; 1861 } 1862 } 1863 1864 /* check change of basis */ 1865 if (pcbddc->dbg_flag) { 1866 PetscInt ii,jj; 1867 PetscBool valid_qr=PETSC_TRUE; 1868 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1872 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1873 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1874 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1875 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1876 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1877 for (jj=0;jj<size_of_constraint;jj++) { 1878 for (ii=0;ii<primal_dofs;ii++) { 1879 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1880 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1881 } 1882 } 1883 if (!valid_qr) { 1884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1885 for (jj=0;jj<size_of_constraint;jj++) { 1886 for (ii=0;ii<primal_dofs;ii++) { 1887 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1888 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1889 } 1890 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1891 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1892 } 1893 } 1894 } 1895 } else { 1896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1897 } 1898 } 1899 } else { /* simple transformation block */ 1900 PetscInt row,col; 1901 PetscScalar val; 1902 for (j=0;j<size_of_constraint;j++) { 1903 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1904 if (!PetscBTLookup(is_primal,row)) { 1905 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1906 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1907 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1908 } else { 1909 for (k=0;k<size_of_constraint;k++) { 1910 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1911 if (row != col) { 1912 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1913 } else { 1914 val = 1.0; 1915 } 1916 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1917 } 1918 } 1919 } 1920 } 1921 /* increment primal counter */ 1922 primal_counter += primal_dofs; 1923 } else { 1924 if (pcbddc->dbg_flag) { 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1926 } 1927 } 1928 /* increment constraint counter total_counts */ 1929 total_counts += primal_dofs; 1930 } 1931 1932 /* free workspace */ 1933 if (qr_needed) { 1934 if (pcbddc->dbg_flag) { 1935 ierr = PetscFree(work);CHKERRQ(ierr); 1936 } 1937 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1938 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1939 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1940 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1941 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1942 } 1943 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1944 /* assembling */ 1945 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1946 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1947 /* 1948 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1949 */ 1950 } 1951 1952 /* flush dbg viewer */ 1953 if (pcbddc->dbg_flag) { 1954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1955 } 1956 1957 /* free workspace */ 1958 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1959 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1960 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1961 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1962 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1963 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1964 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1965 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1966 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1967 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1973 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1974 { 1975 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1976 PC_IS *pcis = (PC_IS*)pc->data; 1977 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1978 PetscInt bs,ierr,i,vertex_size; 1979 PetscViewer viewer=pcbddc->dbg_viewer; 1980 1981 PetscFunctionBegin; 1982 /* Init local Graph struct */ 1983 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1984 1985 /* Check validity of the csr graph passed in by the user */ 1986 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1987 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1988 } 1989 1990 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1991 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1992 Mat mat_adj; 1993 const PetscInt *xadj,*adjncy; 1994 PetscBool flg_row=PETSC_TRUE; 1995 1996 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1997 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1998 if (!flg_row) { 1999 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2000 } 2001 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2002 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2003 if (!flg_row) { 2004 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2005 } 2006 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2007 } 2008 2009 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2010 vertex_size = 1; 2011 if (!pcbddc->user_provided_isfordofs) { 2012 if (!pcbddc->n_ISForDofs) { 2013 IS *custom_ISForDofs; 2014 2015 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2016 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2017 for (i=0;i<bs;i++) { 2018 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2019 } 2020 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2021 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2022 /* remove my references to IS objects */ 2023 for (i=0;i<bs;i++) { 2024 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2025 } 2026 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2027 } 2028 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2029 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2030 } 2031 2032 /* Setup of Graph */ 2033 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2034 2035 /* Graph's connected components analysis */ 2036 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2037 2038 /* print some info to stdout */ 2039 if (pcbddc->dbg_flag) { 2040 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2047 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2048 { 2049 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2050 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2051 PetscErrorCode ierr; 2052 2053 PetscFunctionBegin; 2054 n = 0; 2055 vertices = 0; 2056 if (pcbddc->ConstraintMatrix) { 2057 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2058 for (i=0;i<local_primal_size;i++) { 2059 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2060 if (size_of_constraint == 1) n++; 2061 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2062 } 2063 if (vertices_idx) { 2064 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2065 n = 0; 2066 for (i=0;i<local_primal_size;i++) { 2067 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2068 if (size_of_constraint == 1) { 2069 vertices[n++]=row_cmat_indices[0]; 2070 } 2071 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2072 } 2073 } 2074 } 2075 *n_vertices = n; 2076 if (vertices_idx) *vertices_idx = vertices; 2077 PetscFunctionReturn(0); 2078 } 2079 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2082 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2083 { 2084 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2085 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2086 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2087 PetscBT touched; 2088 PetscErrorCode ierr; 2089 2090 /* This function assumes that the number of local constraints per connected component 2091 is not greater than the number of nodes defined for the connected component 2092 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2093 PetscFunctionBegin; 2094 n = 0; 2095 constraints_index = 0; 2096 if (pcbddc->ConstraintMatrix) { 2097 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2098 max_size_of_constraint = 0; 2099 for (i=0;i<local_primal_size;i++) { 2100 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2101 if (size_of_constraint > 1) { 2102 n++; 2103 } 2104 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2105 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2106 } 2107 if (constraints_idx) { 2108 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2109 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2110 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2111 n = 0; 2112 for (i=0;i<local_primal_size;i++) { 2113 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2114 if (size_of_constraint > 1) { 2115 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2116 /* find first untouched local node */ 2117 j = 0; 2118 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2119 min_index = row_cmat_global_indices[j]; 2120 min_loc = j; 2121 /* search the minimum among nodes not yet touched on the connected component 2122 since there can be more than one constraint on a single cc */ 2123 for (j=1;j<size_of_constraint;j++) { 2124 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2125 min_index = row_cmat_global_indices[j]; 2126 min_loc = j; 2127 } 2128 } 2129 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2130 constraints_index[n++] = row_cmat_indices[min_loc]; 2131 } 2132 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2133 } 2134 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2135 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2136 } 2137 } 2138 *n_constraints = n; 2139 if (constraints_idx) *constraints_idx = constraints_index; 2140 PetscFunctionReturn(0); 2141 } 2142 2143 /* the next two functions has been adapted from pcis.c */ 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "PCBDDCApplySchur" 2146 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2147 { 2148 PetscErrorCode ierr; 2149 PC_IS *pcis = (PC_IS*)(pc->data); 2150 2151 PetscFunctionBegin; 2152 if (!vec2_B) { vec2_B = v; } 2153 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2154 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2155 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2156 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2157 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2163 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2164 { 2165 PetscErrorCode ierr; 2166 PC_IS *pcis = (PC_IS*)(pc->data); 2167 2168 PetscFunctionBegin; 2169 if (!vec2_B) { vec2_B = v; } 2170 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2171 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2172 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2173 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2174 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "PCBDDCSubsetNumbering" 2180 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2181 { 2182 Vec local_vec,global_vec; 2183 IS seqis,paris; 2184 VecScatter scatter_ctx; 2185 PetscScalar *array; 2186 PetscInt *temp_global_dofs; 2187 PetscScalar globalsum; 2188 PetscInt i,j,s; 2189 PetscInt nlocals,first_index,old_index,max_local; 2190 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2191 PetscMPIInt *dof_sizes,*dof_displs; 2192 PetscBool first_found; 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 /* mpi buffers */ 2197 MPI_Comm_size(comm,&size_prec_comm); 2198 MPI_Comm_rank(comm,&rank_prec_comm); 2199 j = ( !rank_prec_comm ? size_prec_comm : 0); 2200 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2201 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2202 /* get maximum size of subset */ 2203 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2204 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2205 max_local = 0; 2206 if (n_local_dofs) { 2207 max_local = temp_global_dofs[0]; 2208 for (i=1;i<n_local_dofs;i++) { 2209 if (max_local < temp_global_dofs[i] ) { 2210 max_local = temp_global_dofs[i]; 2211 } 2212 } 2213 } 2214 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2215 max_global++; 2216 max_local = 0; 2217 if (n_local_dofs) { 2218 max_local = local_dofs[0]; 2219 for (i=1;i<n_local_dofs;i++) { 2220 if (max_local < local_dofs[i] ) { 2221 max_local = local_dofs[i]; 2222 } 2223 } 2224 } 2225 max_local++; 2226 /* allocate workspace */ 2227 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2228 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2229 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2230 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2231 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2232 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2233 /* create scatter */ 2234 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2235 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2236 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2237 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2238 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2239 /* init array */ 2240 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2241 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2242 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2243 if (local_dofs_mult) { 2244 for (i=0;i<n_local_dofs;i++) { 2245 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2246 } 2247 } else { 2248 for (i=0;i<n_local_dofs;i++) { 2249 array[local_dofs[i]]=1.0; 2250 } 2251 } 2252 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2253 /* scatter into global vec and get total number of global dofs */ 2254 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2255 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2256 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2257 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2258 /* Fill global_vec with cumulative function for global numbering */ 2259 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2260 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2261 nlocals = 0; 2262 first_index = -1; 2263 first_found = PETSC_FALSE; 2264 for (i=0;i<s;i++) { 2265 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2266 first_found = PETSC_TRUE; 2267 first_index = i; 2268 } 2269 nlocals += (PetscInt)PetscRealPart(array[i]); 2270 } 2271 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2272 if (!rank_prec_comm) { 2273 dof_displs[0]=0; 2274 for (i=1;i<size_prec_comm;i++) { 2275 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2276 } 2277 } 2278 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2279 if (first_found) { 2280 array[first_index] += (PetscScalar)nlocals; 2281 old_index = first_index; 2282 for (i=first_index+1;i<s;i++) { 2283 if (PetscRealPart(array[i]) > 0.0) { 2284 array[i] += array[old_index]; 2285 old_index = i; 2286 } 2287 } 2288 } 2289 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2290 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2291 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2292 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2293 /* get global ordering of local dofs */ 2294 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2295 if (local_dofs_mult) { 2296 for (i=0;i<n_local_dofs;i++) { 2297 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2298 } 2299 } else { 2300 for (i=0;i<n_local_dofs;i++) { 2301 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2302 } 2303 } 2304 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2305 /* free workspace */ 2306 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2307 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2308 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2309 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2310 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2311 /* return pointer to global ordering of local dofs */ 2312 *global_numbering_subset = temp_global_dofs; 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2318 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2319 { 2320 PetscInt i,j; 2321 PetscScalar *alphas; 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 /* this implements stabilized Gram-Schmidt */ 2326 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2327 for (i=0;i<n;i++) { 2328 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2329 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2330 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2331 } 2332 ierr = PetscFree(alphas);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatConvert_IS_AIJ" 2339 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2340 { 2341 Mat new_mat; 2342 Mat_IS *matis = (Mat_IS*)(mat->data); 2343 /* info on mat */ 2344 PetscInt bs,rows,cols; 2345 PetscInt lrows,lcols; 2346 PetscInt local_rows,local_cols; 2347 PetscMPIInt nsubdomains; 2348 /* preallocation */ 2349 Vec vec_dnz,vec_onz; 2350 PetscScalar *my_dnz,*my_onz,*array; 2351 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2352 PetscInt index_row,index_col,owner; 2353 PetscInt *local_indices,*global_indices; 2354 /* work */ 2355 PetscInt i,j; 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 /* CHECKS */ 2360 /* get info from mat */ 2361 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2362 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2363 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2364 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2365 2366 /* MAT_INITIAL_MATRIX */ 2367 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2368 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2369 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2370 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2371 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2372 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2373 2374 /* 2375 preallocation 2376 */ 2377 2378 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2379 /* 2380 Some vectors are needed to sum up properly on shared interface dofs. 2381 Note that preallocation is not exact, since it overestimates nonzeros 2382 */ 2383 /* 2384 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2385 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2386 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2387 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2388 */ 2389 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2390 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2391 /* All processes need to compute entire row ownership */ 2392 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2393 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2394 for (i=0;i<nsubdomains;i++) { 2395 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2396 row_ownership[j]=i; 2397 } 2398 } 2399 /* map indices of local mat to global values */ 2400 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2401 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2402 for (i=0;i<local_rows;i++) local_indices[i]=i; 2403 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2404 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2405 2406 /* 2407 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2408 then, they will be summed up properly. This way, preallocation is always sufficient 2409 */ 2410 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2411 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2412 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2413 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2414 for (i=0;i<local_rows;i++) { 2415 index_row = global_indices[i]; 2416 for (j=i;j<local_rows;j++) { 2417 owner = row_ownership[index_row]; 2418 index_col = global_indices[j]; 2419 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2420 my_dnz[i] += 1.0; 2421 } else { /* offdiag block */ 2422 my_onz[i] += 1.0; 2423 } 2424 /* same as before, interchanging rows and cols */ 2425 if (i != j) { 2426 owner = row_ownership[index_col]; 2427 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2428 my_dnz[j] += 1.0; 2429 } else { 2430 my_onz[j] += 1.0; 2431 } 2432 } 2433 } 2434 } 2435 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2436 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2437 if (local_rows) { /* multilevel guard */ 2438 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2439 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2440 } 2441 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2442 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2443 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2444 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2445 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2446 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2447 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2448 2449 /* set computed preallocation in dnz and onz */ 2450 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2451 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2452 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2453 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2454 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2455 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2456 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2457 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2458 2459 /* Resize preallocation if overestimated */ 2460 for (i=0;i<lrows;i++) { 2461 dnz[i] = PetscMin(dnz[i],lcols); 2462 onz[i] = PetscMin(onz[i],cols-lcols); 2463 } 2464 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2465 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2466 2467 /* 2468 Set values. Very Basic. 2469 */ 2470 for (i=0;i<local_rows;i++) { 2471 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2472 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2473 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2474 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2475 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2476 } 2477 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2478 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2479 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2480 2481 /* get back mat */ 2482 *M = new_mat; 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatISSubassemble_Private" 2488 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2489 { 2490 Mat subdomain_adj; 2491 IS new_ranks,ranks_send_to; 2492 MatPartitioning partitioner; 2493 Mat_IS *matis; 2494 PetscInt n_neighs,*neighs,*n_shared,**shared; 2495 PetscInt prank; 2496 PetscMPIInt size,rank,color; 2497 PetscInt *xadj,*adjncy,*oldranks; 2498 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2499 PetscInt i,j,n_subdomains,local_size,threshold=0; 2500 PetscErrorCode ierr; 2501 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2502 PetscSubcomm subcomm; 2503 2504 PetscFunctionBegin; 2505 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2506 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2507 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2508 2509 /* Get info on mapping */ 2510 matis = (Mat_IS*)(mat->data); 2511 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2512 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2513 2514 /* build local CSR graph of subdomains' connectivity */ 2515 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2516 xadj[0] = 0; 2517 xadj[1] = PetscMax(n_neighs-1,0); 2518 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2519 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2520 2521 if (threshold) { 2522 PetscInt* count,min_threshold; 2523 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2524 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2525 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2526 for (j=0;j<n_shared[i];j++) { 2527 count[shared[i][j]] += 1; 2528 } 2529 } 2530 /* adapt threshold since we dont want to lose significant connections */ 2531 min_threshold = n_neighs; 2532 for (i=1;i<n_neighs;i++) { 2533 for (j=0;j<n_shared[i];j++) { 2534 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2535 } 2536 } 2537 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2538 threshold = PetscMax(min_threshold+1,threshold); 2539 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2540 xadj[1] = 0; 2541 for (i=1;i<n_neighs;i++) { 2542 for (j=0;j<n_shared[i];j++) { 2543 if (count[shared[i][j]] < threshold) { 2544 adjncy[xadj[1]] = neighs[i]; 2545 adjncy_wgt[xadj[1]] = n_shared[i]; 2546 xadj[1]++; 2547 break; 2548 } 2549 } 2550 } 2551 ierr = PetscFree(count);CHKERRQ(ierr); 2552 } else { 2553 if (xadj[1]) { 2554 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2555 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2556 } 2557 } 2558 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2559 if (use_square) { 2560 for (i=0;i<xadj[1];i++) { 2561 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2562 } 2563 } 2564 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2565 2566 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2567 2568 /* 2569 Restrict work on active processes only. 2570 */ 2571 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2572 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2573 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2574 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2575 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2576 if (color) { 2577 ierr = PetscFree(xadj);CHKERRQ(ierr); 2578 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2579 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2580 } else { 2581 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2582 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2583 prank = rank; 2584 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2585 for (i=0;i<size;i++) { 2586 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2587 } 2588 for (i=0;i<xadj[1];i++) { 2589 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2590 } 2591 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2592 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2593 n_subdomains = (PetscInt)size; 2594 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2595 2596 /* Partition */ 2597 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2598 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2599 if (use_vwgt) { 2600 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2601 v_wgt[0] = local_size; 2602 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2603 } 2604 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2605 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2606 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2607 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2608 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2609 2610 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2611 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2612 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2613 /* clean up */ 2614 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2615 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2616 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2617 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2618 } 2619 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2620 2621 /* assemble parallel IS for sends */ 2622 i = 1; 2623 if (color) i=0; 2624 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2625 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2626 2627 /* get back IS */ 2628 *is_sends = ranks_send_to; 2629 PetscFunctionReturn(0); 2630 } 2631 2632 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatISSubassemble" 2636 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2637 { 2638 Mat local_mat,new_mat; 2639 Mat_IS *matis; 2640 IS is_sends_internal; 2641 PetscInt rows,cols; 2642 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2643 PetscBool ismatis,isdense; 2644 ISLocalToGlobalMapping l2gmap; 2645 PetscInt* l2gmap_indices; 2646 const PetscInt* is_indices; 2647 MatType new_local_type; 2648 MatTypePrivate new_local_type_private; 2649 /* buffers */ 2650 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2651 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2652 /* MPI */ 2653 MPI_Comm comm; 2654 PetscMPIInt n_sends,n_recvs,commsize; 2655 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2656 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2657 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2658 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2659 PetscErrorCode ierr; 2660 2661 PetscFunctionBegin; 2662 /* checks */ 2663 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2664 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2665 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2666 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2667 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2668 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2669 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2670 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2671 PetscValidLogicalCollectiveInt(mat,bs,0); 2672 /* prepare IS for sending if not provided */ 2673 if (!is_sends) { 2674 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2675 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2676 } else { 2677 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2678 is_sends_internal = is_sends; 2679 } 2680 2681 /* get pointer of MATIS data */ 2682 matis = (Mat_IS*)mat->data; 2683 2684 /* get comm */ 2685 comm = PetscObjectComm((PetscObject)mat); 2686 2687 /* compute number of sends */ 2688 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2689 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2690 2691 /* compute number of receives */ 2692 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2693 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2694 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2695 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2696 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2697 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2698 ierr = PetscFree(iflags);CHKERRQ(ierr); 2699 2700 /* prepare send/receive buffers */ 2701 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2702 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2703 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2704 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2705 2706 /* Get data from local mat */ 2707 if (!isdense) { 2708 /* TODO: See below some guidelines on how to prepare the local buffers */ 2709 /* 2710 send_buffer_vals should contain the raw values of the local matrix 2711 send_buffer_idxs should contain: 2712 - MatType_PRIVATE type 2713 - PetscInt size_of_l2gmap 2714 - PetscInt global_row_indices[size_of_l2gmap] 2715 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2716 */ 2717 } else { 2718 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2719 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2720 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2721 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2722 send_buffer_idxs[1] = i; 2723 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2724 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2725 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2726 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2727 for (i=0;i<n_sends;i++) { 2728 ilengths_vals[is_indices[i]] = len*len; 2729 ilengths_idxs[is_indices[i]] = len+2; 2730 } 2731 } 2732 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2733 buf_size_idxs = 0; 2734 buf_size_vals = 0; 2735 for (i=0;i<n_recvs;i++) { 2736 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2737 buf_size_vals += (PetscInt)olengths_vals[i]; 2738 } 2739 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2740 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2741 2742 /* get new tags for clean communications */ 2743 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2744 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2745 2746 /* allocate for requests */ 2747 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2748 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2749 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2750 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2751 2752 /* communications */ 2753 ptr_idxs = recv_buffer_idxs; 2754 ptr_vals = recv_buffer_vals; 2755 for (i=0;i<n_recvs;i++) { 2756 source_dest = onodes[i]; 2757 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2758 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2759 ptr_idxs += olengths_idxs[i]; 2760 ptr_vals += olengths_vals[i]; 2761 } 2762 for (i=0;i<n_sends;i++) { 2763 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2764 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2765 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2766 } 2767 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2768 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2769 2770 /* assemble new l2g map */ 2771 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2772 ptr_idxs = recv_buffer_idxs; 2773 buf_size_idxs = 0; 2774 for (i=0;i<n_recvs;i++) { 2775 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2776 ptr_idxs += olengths_idxs[i]; 2777 } 2778 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2779 ptr_idxs = recv_buffer_idxs; 2780 buf_size_idxs = 0; 2781 for (i=0;i<n_recvs;i++) { 2782 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2783 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2784 ptr_idxs += olengths_idxs[i]; 2785 } 2786 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2787 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2788 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2789 2790 /* infer new local matrix type from received local matrices type */ 2791 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2792 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2793 new_local_type_private = MATAIJ_PRIVATE; 2794 new_local_type = MATSEQAIJ; 2795 if (n_recvs) { 2796 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2797 ptr_idxs = recv_buffer_idxs; 2798 for (i=0;i<n_recvs;i++) { 2799 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2800 new_local_type_private = MATAIJ_PRIVATE; 2801 break; 2802 } 2803 ptr_idxs += olengths_idxs[i]; 2804 } 2805 switch (new_local_type_private) { 2806 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2807 new_local_type = MATSEQAIJ; 2808 bs = 1; 2809 break; 2810 case MATAIJ_PRIVATE: 2811 new_local_type = MATSEQAIJ; 2812 bs = 1; 2813 break; 2814 case MATBAIJ_PRIVATE: 2815 new_local_type = MATSEQBAIJ; 2816 break; 2817 case MATSBAIJ_PRIVATE: 2818 new_local_type = MATSEQSBAIJ; 2819 break; 2820 default: 2821 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2822 break; 2823 } 2824 } 2825 2826 /* create MATIS object */ 2827 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2828 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2829 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2830 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2831 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2832 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2833 2834 /* set values */ 2835 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2836 ptr_vals = recv_buffer_vals; 2837 ptr_idxs = recv_buffer_idxs; 2838 for (i=0;i<n_recvs;i++) { 2839 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2840 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2841 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2842 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2843 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2845 } 2846 ptr_idxs += olengths_idxs[i]; 2847 ptr_vals += olengths_vals[i]; 2848 } 2849 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2850 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2851 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2852 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 2854 { /* check */ 2855 Vec lvec,rvec; 2856 PetscReal infty_error; 2857 2858 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2859 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2860 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2861 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2862 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2863 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2864 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2865 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2866 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2867 } 2868 2869 /* free workspace */ 2870 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2871 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2872 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2873 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2874 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2875 if (isdense) { 2876 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2877 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2878 } else { 2879 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2880 } 2881 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2882 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2883 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2884 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2885 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2886 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2887 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2888 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2889 ierr = PetscFree(onodes);CHKERRQ(ierr); 2890 /* get back new mat */ 2891 *mat_n = new_mat; 2892 PetscFunctionReturn(0); 2893 } 2894 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2897 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2898 { 2899 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2900 PC_IS *pcis = (PC_IS*)pc->data; 2901 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2902 MatNullSpace CoarseNullSpace=NULL; 2903 ISLocalToGlobalMapping coarse_islg; 2904 IS coarse_is; 2905 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2906 PetscInt im_active=-1,active_procs=-1; 2907 PC pc_temp; 2908 PCType coarse_pc_type; 2909 KSPType coarse_ksp_type; 2910 PetscBool multilevel_requested,multilevel_allowed; 2911 PetscBool setsym,issym,isbddc,isnn; 2912 MatStructure matstruct; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 /* Assign global numbering to coarse dofs */ 2917 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2918 2919 /* infer some info from user */ 2920 issym = PETSC_FALSE; 2921 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2922 multilevel_allowed = PETSC_FALSE; 2923 multilevel_requested = PETSC_FALSE; 2924 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2925 if (multilevel_requested) { 2926 /* count "active processes" */ 2927 im_active = !!(pcis->n); 2928 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2929 if (active_procs/pcbddc->coarsening_ratio < 2) { 2930 multilevel_allowed = PETSC_FALSE; 2931 } else { 2932 multilevel_allowed = PETSC_TRUE; 2933 } 2934 } 2935 2936 /* set defaults for coarse KSP and PC */ 2937 if (multilevel_allowed) { 2938 if (issym) { 2939 coarse_ksp_type = KSPRICHARDSON; 2940 } else { 2941 coarse_ksp_type = KSPCHEBYSHEV; 2942 } 2943 coarse_pc_type = PCBDDC; 2944 } else { 2945 coarse_ksp_type = KSPPREONLY; 2946 coarse_pc_type = PCREDUNDANT; 2947 } 2948 2949 /* create the coarse KSP object only once with defaults */ 2950 if (!pcbddc->coarse_ksp) { 2951 char prefix[256],str_level[3]; 2952 size_t len; 2953 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2954 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2955 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2956 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2957 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2958 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2959 /* prefix */ 2960 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2961 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2962 if (!pcbddc->current_level) { 2963 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2964 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2965 } else { 2966 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2967 if (pcbddc->current_level>1) len -= 2; 2968 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 2969 *(prefix+len)='\0'; 2970 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 2971 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 2972 } 2973 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 2974 } 2975 /* allow user customization */ 2976 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2977 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2978 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 2979 2980 /* get some info after set from options */ 2981 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2982 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 2983 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2984 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 2985 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2986 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2987 isbddc = PETSC_FALSE; 2988 } 2989 2990 /* propagate BDDC info to the next level */ 2991 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2992 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2993 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2994 2995 /* creates temporary MATIS object for coarse matrix */ 2996 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2997 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2998 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2999 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3000 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3001 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3002 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3003 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3004 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3005 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3006 3007 /* assemble coarse matrix */ 3008 if (isbddc || isnn) { 3009 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3010 } else { 3011 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3012 } 3013 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3014 3015 /* create local to global scatters for coarse problem */ 3016 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3017 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3018 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3019 3020 /* propagate symmetry info to coarse matrix */ 3021 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3022 3023 /* Compute coarse null space (special handling by BDDC only) */ 3024 if (pcbddc->NullSpace) { 3025 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3026 if (isbddc) { 3027 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3028 } else { 3029 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3030 } 3031 } 3032 3033 /* set operators */ 3034 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3035 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3036 3037 /* additional KSP customization */ 3038 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3039 if (max_it < 5) { 3040 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3041 } 3042 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3043 3044 3045 /* print some info if requested */ 3046 if (pcbddc->dbg_flag) { 3047 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3048 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3049 if (!multilevel_allowed) { 3050 if (multilevel_requested) { 3051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3052 } else if (pcbddc->max_levels) { 3053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3054 } 3055 } 3056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3058 } 3059 3060 /* setup coarse ksp */ 3061 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3062 if (pcbddc->dbg_flag) { 3063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3064 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3065 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3066 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3067 } 3068 3069 /* Check coarse problem if requested */ 3070 if (pcbddc->dbg_flag) { 3071 KSP check_ksp; 3072 KSPType check_ksp_type; 3073 PC check_pc; 3074 Vec check_vec; 3075 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3076 PetscInt its; 3077 PetscBool ispreonly,compute; 3078 3079 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3080 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3081 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3082 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3083 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3084 if (ispreonly) { 3085 check_ksp_type = KSPPREONLY; 3086 compute = PETSC_FALSE; 3087 } else { 3088 if (issym) check_ksp_type = KSPCG; 3089 else check_ksp_type = KSPGMRES; 3090 compute = PETSC_TRUE; 3091 } 3092 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3093 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3094 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3095 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3096 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3097 /* create random vec */ 3098 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3099 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3100 if (CoarseNullSpace) { 3101 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3102 } 3103 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3104 /* solve coarse problem */ 3105 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3106 if (CoarseNullSpace) { 3107 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3108 } 3109 /* check coarse problem residual error */ 3110 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3111 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3112 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3113 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3114 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3118 /* get eigenvalue estimation if preonly has not been requested */ 3119 if (!ispreonly) { 3120 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3121 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3122 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3123 } 3124 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3125 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3126 } 3127 /* free memory */ 3128 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3129 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3135 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3136 { 3137 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3138 PC_IS* pcis = (PC_IS*)pc->data; 3139 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3140 PetscInt i,j,coarse_size; 3141 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3142 PetscErrorCode ierr; 3143 3144 PetscFunctionBegin; 3145 /* get indices in local ordering for vertices and constraints */ 3146 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3147 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3148 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3149 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3150 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3151 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3152 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3153 3154 /* Compute global number of coarse dofs */ 3155 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3156 3157 /* check numbering */ 3158 if (pcbddc->dbg_flag) { 3159 PetscScalar coarsesum,*array; 3160 3161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3164 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3165 for (i=0;i<pcbddc->local_primal_size;i++) { 3166 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3167 } 3168 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3169 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3170 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3171 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3172 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3173 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3175 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3176 for (i=0;i<pcis->n;i++) { 3177 if (array[i] == 1.0) { 3178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3179 } 3180 } 3181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3182 for (i=0;i<pcis->n;i++) { 3183 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3184 } 3185 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3186 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3187 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3189 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3191 if (pcbddc->dbg_flag > 1) { 3192 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3193 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3194 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3195 for (i=0;i<pcbddc->local_primal_size;i++) { 3196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3197 } 3198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3199 } 3200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3201 } 3202 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3203 /* get back data */ 3204 *coarse_size_n = coarse_size; 3205 *local_primal_indices_n = local_primal_indices; 3206 PetscFunctionReturn(0); 3207 } 3208 3209