1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 536 if (!pcbddc->issym) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 unsymmetric_check = PETSC_TRUE; 624 } else { /* take references to already computed coarse basis */ 625 unsymmetric_check = PETSC_FALSE; 626 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 627 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 628 if (pcbddc->coarse_phi_D) { 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 630 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 631 } 632 } 633 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 634 /* Checking coarse_sub_mat and coarse basis functios */ 635 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 637 if (pcbddc->dbg_flag) { 638 Mat coarse_sub_mat; 639 Mat AUXMAT,TM1,TM2,TM3,TM4; 640 Mat coarse_phi_D,coarse_phi_B; 641 Mat coarse_psi_D,coarse_psi_B; 642 Mat A_II,A_BB,A_IB,A_BI; 643 MatType checkmattype=MATSEQAIJ; 644 PetscReal real_value; 645 646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 652 if (unsymmetric_check) { 653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 655 } 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 657 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } else { 675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } 684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 695 for (i=0;i<pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 699 for (i=0;i<pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 if (unsymmetric_check) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 704 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 708 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 } 712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 722 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 723 if (unsymmetric_check) { 724 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 729 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 730 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 731 } 732 /* free memory */ 733 if (n_vertices) { 734 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 738 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 739 } 740 if (n_constraints) { 741 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 742 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 743 ierr = MatDestroy(&M1);CHKERRQ(ierr); 744 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(auxindices);CHKERRQ(ierr); 747 /* get back data */ 748 *coarse_submat_vals_n = coarse_submat_vals; 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 755 { 756 PC_IS* pcis = (PC_IS*)(pc->data); 757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 759 PetscBool issbaij,isseqaij; 760 /* manage repeated solves */ 761 MatReuse reuse; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 if (pc->flag == SAME_NONZERO_PATTERN) { 772 reuse = MAT_REUSE_MATRIX; 773 } else { 774 reuse = MAT_INITIAL_MATRIX; 775 } 776 } 777 if (reuse == MAT_INITIAL_MATRIX) { 778 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 783 } 784 785 /* transform local matrices if needed */ 786 if (pcbddc->ChangeOfBasisMatrix) { 787 Mat change_mat_all; 788 PetscScalar *row_cmat_values; 789 PetscInt *row_cmat_indices; 790 PetscInt *nnz,*is_indices,*temp_indices; 791 PetscInt i,j,k,n_D,n_B; 792 793 /* Get Non-overlapping dimensions */ 794 n_B = pcis->n_B; 795 n_D = pcis->n-n_B; 796 797 /* compute nonzero structure of change of basis on all local nodes */ 798 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 801 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 k=1; 804 for (i=0;i<n_B;i++) { 805 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 nnz[is_indices[i]]=j; 807 if (k < j) k = j; 808 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 } 810 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 /* assemble change of basis matrix on the whole set of local dofs */ 812 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 813 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 814 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 815 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 816 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 for (i=0;i<n_D;i++) { 819 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0;i<n_B;i++) { 824 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 826 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 } 829 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 831 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 833 if (isseqaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* 888 No need to setup local scatters if 889 - primal space is unchanged 890 AND 891 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 892 AND 893 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 894 */ 895 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 896 PetscFunctionReturn(0); 897 } 898 /* destroy old objects */ 899 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 902 /* Set Non-overlapping dimensions */ 903 n_B = pcis->n_B; n_D = pcis->n - n_B; 904 n_vertices = pcbddc->n_actual_vertices; 905 /* create auxiliary bitmask */ 906 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 909 } 910 911 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 912 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 913 for (i=0, n_R=0; i<pcis->n; i++) { 914 if (!PetscBTLookup(bitmask,i)) { 915 idx_R_local[n_R] = i; 916 n_R++; 917 } 918 } 919 920 /* Block code */ 921 vbs = 1; 922 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 923 if (bs>1 && !(n_vertices%bs)) { 924 PetscBool is_blocked = PETSC_TRUE; 925 PetscInt *vary; 926 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 927 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 928 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 929 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 930 for (i=0; i<n_vertices; i++) { 931 if (vary[i]!=0 && vary[i]!=bs) { 932 is_blocked = PETSC_FALSE; 933 break; 934 } 935 } 936 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 937 vbs = bs; 938 for (i=0;i<n_R/vbs;i++) { 939 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 940 } 941 } 942 ierr = PetscFree(vary);CHKERRQ(ierr); 943 } 944 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 945 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 946 947 /* print some info if requested */ 948 if (pcbddc->dbg_flag) { 949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 } 958 959 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 960 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 962 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 963 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0; i<n_D; i++) { 965 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_R; i++) { 969 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 970 aux_array1[j++] = i; 971 } 972 } 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 974 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_B; i++) { 976 if (!PetscBTLookup(bitmask,is_indices[i])) { 977 aux_array2[j++] = i; 978 } 979 } 980 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 985 986 if (pcbddc->switch_static || pcbddc->dbg_flag) { 987 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 988 for (i=0, j=0; i<n_R; i++) { 989 if (PetscBTLookup(bitmask,idx_R_local[i])) { 990 aux_array1[j++] = i; 991 } 992 } 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 994 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 995 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 996 } 997 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1006 { 1007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1008 PC_IS *pcis = (PC_IS*)pc->data; 1009 PC pc_temp; 1010 Mat A_RR; 1011 MatReuse reuse; 1012 PetscScalar m_one = -1.0; 1013 PetscReal value; 1014 PetscInt n_D,n_R,ibs,mbs; 1015 PetscBool use_exact,use_exact_reduced,issbaij; 1016 PetscErrorCode ierr; 1017 /* prefixes stuff */ 1018 char dir_prefix[256],neu_prefix[256],str_level[16]; 1019 size_t len; 1020 1021 PetscFunctionBegin; 1022 1023 /* compute prefixes */ 1024 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1025 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1026 if (!pcbddc->current_level) { 1027 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 } else { 1032 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1033 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1034 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1035 len -= 15; /* remove "pc_bddc_coarse_" */ 1036 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1037 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1038 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1039 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1040 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1041 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1042 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1043 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1044 } 1045 1046 /* DIRICHLET PROBLEM */ 1047 /* Matrix for Dirichlet problem is pcis->A_II */ 1048 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1049 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1050 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1051 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1052 /* default */ 1053 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1054 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1055 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1056 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1057 if (issbaij) { 1058 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1059 } else { 1060 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1061 } 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1065 } 1066 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1067 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1068 if (!n_D) { 1069 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1070 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1071 } 1072 /* Set Up KSP for Dirichlet problem of BDDC */ 1073 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1074 /* set ksp_D into pcis data */ 1075 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1077 pcis->ksp_D = pcbddc->ksp_D; 1078 1079 /* NEUMANN PROBLEM */ 1080 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1081 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1082 if (pcbddc->ksp_R) { /* already created ksp */ 1083 PetscInt nn_R; 1084 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1085 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1086 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1087 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1088 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1092 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1093 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1094 reuse = MAT_INITIAL_MATRIX; 1095 } else { /* safe to reuse the matrix */ 1096 reuse = MAT_REUSE_MATRIX; 1097 } 1098 } 1099 /* last check */ 1100 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1101 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 } else { /* first time, so we need to create the matrix */ 1105 reuse = MAT_INITIAL_MATRIX; 1106 } 1107 /* extract A_RR */ 1108 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1109 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1110 if (ibs != mbs) { 1111 Mat newmat; 1112 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1113 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1115 } else { 1116 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1117 } 1118 if (!pcbddc->ksp_R) { /* create object if not present */ 1119 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1120 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1121 /* default */ 1122 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1123 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1124 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1125 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1126 if (issbaij) { 1127 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1128 } else { 1129 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1130 } 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1136 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1137 if (!n_R) { 1138 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1139 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1140 } 1141 /* Set Up KSP for Neumann problem of BDDC */ 1142 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1143 1144 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1145 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1146 /* Dirichlet */ 1147 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1148 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1149 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1152 /* need to be adapted? */ 1153 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1154 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1155 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1156 /* print info */ 1157 if (pcbddc->dbg_flag) { 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1171 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1172 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1175 /* need to be adapted? */ 1176 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1177 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1178 /* print info */ 1179 if (pcbddc->dbg_flag) { 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1185 } 1186 } 1187 /* free Neumann problem's matrix */ 1188 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1194 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1195 { 1196 PetscErrorCode ierr; 1197 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1198 1199 PetscFunctionBegin; 1200 if (applytranspose) { 1201 if (pcbddc->local_auxmat1) { 1202 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1203 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1204 } 1205 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1206 } else { 1207 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1210 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1211 } 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1219 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1220 { 1221 PetscErrorCode ierr; 1222 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1223 PC_IS* pcis = (PC_IS*) (pc->data); 1224 const PetscScalar zero = 0.0; 1225 1226 PetscFunctionBegin; 1227 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1228 if (applytranspose) { 1229 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1230 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1231 } else { 1232 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1234 } 1235 /* start communications from local primal nodes to rhs of coarse solver */ 1236 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1237 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1241 /* TODO remove null space when doing multilevel */ 1242 if (pcbddc->coarse_ksp) { 1243 if (applytranspose) { 1244 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1245 } else { 1246 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1247 } 1248 } 1249 /* start communications from coarse solver solution to local primal nodes */ 1250 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 1252 /* Local solution on R nodes */ 1253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 } 1260 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1261 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 if (pcbddc->switch_static) { 1265 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 } 1268 1269 /* complete communications from coarse sol to local primal nodes */ 1270 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 1272 /* Sum contributions from two levels */ 1273 if (applytranspose) { 1274 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1276 } else { 1277 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1278 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 PetscScalar *array,*array2; 1291 Vec from,to; 1292 1293 PetscFunctionBegin; 1294 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1295 from = pcbddc->coarse_vec; 1296 to = pcbddc->vec1_P; 1297 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1298 Vec tvec; 1299 PetscInt lsize; 1300 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1301 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1302 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1303 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1304 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1307 } 1308 } else { /* from local to global -> put data in coarse right hand side */ 1309 from = pcbddc->vec1_P; 1310 to = pcbddc->coarse_vec; 1311 } 1312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1319 { 1320 PetscErrorCode ierr; 1321 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1322 PetscScalar *array,*array2; 1323 Vec from,to; 1324 1325 PetscFunctionBegin; 1326 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1327 from = pcbddc->coarse_vec; 1328 to = pcbddc->vec1_P; 1329 } else { /* from local to global -> put data in coarse right hand side */ 1330 from = pcbddc->vec1_P; 1331 to = pcbddc->coarse_vec; 1332 } 1333 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1334 if (smode == SCATTER_FORWARD) { 1335 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1336 Vec tvec; 1337 PetscInt lsize; 1338 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1339 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1340 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1341 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1342 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1343 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1344 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1345 } 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /* uncomment for testing purposes */ 1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1355 { 1356 PetscErrorCode ierr; 1357 PC_IS* pcis = (PC_IS*)(pc->data); 1358 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1359 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1360 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1361 MatType impMatType=MATSEQAIJ; 1362 /* one and zero */ 1363 PetscScalar one=1.0,zero=0.0; 1364 /* space to store constraints and their local indices */ 1365 PetscScalar *temp_quadrature_constraint; 1366 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1367 /* iterators */ 1368 PetscInt i,j,k,total_counts,temp_start_ptr; 1369 /* stuff to store connected components stored in pcbddc->mat_graph */ 1370 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1371 PetscInt n_ISForFaces,n_ISForEdges; 1372 /* near null space stuff */ 1373 MatNullSpace nearnullsp; 1374 const Vec *nearnullvecs; 1375 Vec *localnearnullsp; 1376 PetscBool nnsp_has_cnst; 1377 PetscInt nnsp_size; 1378 PetscScalar *array; 1379 /* BLAS integers */ 1380 PetscBLASInt lwork,lierr; 1381 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1382 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1383 /* LAPACK working arrays for SVD or POD */ 1384 PetscBool skip_lapack; 1385 PetscScalar *work; 1386 PetscReal *singular_vals; 1387 #if defined(PETSC_USE_COMPLEX) 1388 PetscReal *rwork; 1389 #endif 1390 #if defined(PETSC_MISSING_LAPACK_GESVD) 1391 PetscBLASInt Blas_one_2=1; 1392 PetscScalar *temp_basis,*correlation_mat; 1393 #else 1394 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1395 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1396 #endif 1397 /* reuse */ 1398 PetscInt olocal_primal_size; 1399 PetscInt *oprimal_indices_local_idxs; 1400 /* change of basis */ 1401 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1402 PetscBool boolforchange,qr_needed; 1403 PetscBT touched,change_basis,qr_needed_idx; 1404 /* auxiliary stuff */ 1405 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1406 /* some quantities */ 1407 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1408 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1409 1410 1411 PetscFunctionBegin; 1412 /* Destroy Mat objects computed previously */ 1413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1415 /* Get index sets for faces, edges and vertices from graph */ 1416 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1417 pcbddc->use_vertices = PETSC_TRUE; 1418 } 1419 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1420 /* HACKS (the two following code branches) */ 1421 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1422 pcbddc->use_change_of_basis = PETSC_TRUE; 1423 pcbddc->use_change_on_faces = PETSC_FALSE; 1424 } 1425 if (pcbddc->NullSpace) { 1426 /* use_change_of_basis should be consistent among processors */ 1427 PetscBool tbool = pcbddc->use_change_of_basis; 1428 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1429 } 1430 /* print some info */ 1431 if (pcbddc->dbg_flag) { 1432 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1434 i = 0; 1435 if (ISForVertices) { 1436 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1441 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1442 } 1443 /* check if near null space is attached to global mat */ 1444 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1445 if (nearnullsp) { 1446 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1447 /* remove any stored info */ 1448 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1449 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1450 /* store information for BDDC solver reuse */ 1451 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1452 pcbddc->onearnullspace = nearnullsp; 1453 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1454 for (i=0;i<nnsp_size;i++) { 1455 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1456 } 1457 } else { /* if near null space is not provided BDDC uses constants by default */ 1458 nnsp_size = 0; 1459 nnsp_has_cnst = PETSC_TRUE; 1460 } 1461 /* get max number of constraints on a single cc */ 1462 max_constraints = nnsp_size; 1463 if (nnsp_has_cnst) max_constraints++; 1464 1465 /* 1466 Evaluate maximum storage size needed by the procedure 1467 - temp_indices will contain start index of each constraint stored as follows 1468 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1469 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1470 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1471 */ 1472 total_counts = n_ISForFaces+n_ISForEdges; 1473 total_counts *= max_constraints; 1474 n_vertices = 0; 1475 if (ISForVertices) { 1476 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1477 } 1478 total_counts += n_vertices; 1479 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1480 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1481 total_counts = 0; 1482 max_size_of_constraint = 0; 1483 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1484 if (i<n_ISForEdges) { 1485 used_IS = &ISForEdges[i]; 1486 } else { 1487 used_IS = &ISForFaces[i-n_ISForEdges]; 1488 } 1489 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1490 total_counts += j; 1491 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1492 } 1493 total_counts *= max_constraints; 1494 total_counts += n_vertices; 1495 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1496 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1497 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1498 /* get local part of global near null space vectors */ 1499 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1500 for (k=0;k<nnsp_size;k++) { 1501 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1502 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 } 1505 1506 /* whether or not to skip lapack calls */ 1507 skip_lapack = PETSC_TRUE; 1508 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1509 1510 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1511 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1512 PetscScalar temp_work; 1513 #if defined(PETSC_MISSING_LAPACK_GESVD) 1514 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1515 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1516 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1517 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1518 #if defined(PETSC_USE_COMPLEX) 1519 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1520 #endif 1521 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1522 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1523 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1524 lwork = -1; 1525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1526 #if !defined(PETSC_USE_COMPLEX) 1527 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1528 #else 1529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1530 #endif 1531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1532 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1533 #else /* on missing GESVD */ 1534 /* SVD */ 1535 PetscInt max_n,min_n; 1536 max_n = max_size_of_constraint; 1537 min_n = max_constraints; 1538 if (max_size_of_constraint < max_constraints) { 1539 min_n = max_size_of_constraint; 1540 max_n = max_constraints; 1541 } 1542 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1543 #if defined(PETSC_USE_COMPLEX) 1544 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1545 #endif 1546 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1547 lwork = -1; 1548 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1552 #if !defined(PETSC_USE_COMPLEX) 1553 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1554 #else 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1556 #endif 1557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1559 #endif /* on missing GESVD */ 1560 /* Allocate optimal workspace */ 1561 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1562 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1563 } 1564 /* Now we can loop on constraining sets */ 1565 total_counts = 0; 1566 temp_indices[0] = 0; 1567 /* vertices */ 1568 if (ISForVertices) { 1569 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1570 if (nnsp_has_cnst) { /* consider all vertices */ 1571 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1572 for (i=0;i<n_vertices;i++) { 1573 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1574 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1575 total_counts++; 1576 } 1577 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1578 PetscBool used_vertex; 1579 for (i=0;i<n_vertices;i++) { 1580 used_vertex = PETSC_FALSE; 1581 k = 0; 1582 while (!used_vertex && k<nnsp_size) { 1583 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1584 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1585 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1586 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1587 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1588 total_counts++; 1589 used_vertex = PETSC_TRUE; 1590 } 1591 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1592 k++; 1593 } 1594 } 1595 } 1596 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 n_vertices = total_counts; 1598 } 1599 1600 /* edges and faces */ 1601 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1602 if (i<n_ISForEdges) { 1603 used_IS = &ISForEdges[i]; 1604 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1605 } else { 1606 used_IS = &ISForFaces[i-n_ISForEdges]; 1607 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1608 } 1609 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1610 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1611 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1612 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1613 /* change of basis should not be performed on local periodic nodes */ 1614 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1615 if (nnsp_has_cnst) { 1616 PetscScalar quad_value; 1617 temp_constraints++; 1618 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1619 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1620 for (j=0;j<size_of_constraint;j++) { 1621 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1622 } 1623 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1624 total_counts++; 1625 } 1626 for (k=0;k<nnsp_size;k++) { 1627 PetscReal real_value; 1628 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1629 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1630 for (j=0;j<size_of_constraint;j++) { 1631 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1632 } 1633 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1634 /* check if array is null on the connected component */ 1635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1636 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1637 if (real_value > 0.0) { /* keep indices and values */ 1638 temp_constraints++; 1639 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1640 total_counts++; 1641 } 1642 } 1643 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1644 valid_constraints = temp_constraints; 1645 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1646 if (!pcbddc->use_nnsp_true && temp_constraints) { 1647 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1648 1649 #if defined(PETSC_MISSING_LAPACK_GESVD) 1650 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1651 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1652 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1653 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1654 from that computed using LAPACKgesvd 1655 -> This is due to a different computation of eigenvectors in LAPACKheev 1656 -> The quality of the POD-computed basis will be the same */ 1657 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1658 /* Store upper triangular part of correlation matrix */ 1659 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1660 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1661 for (j=0;j<temp_constraints;j++) { 1662 for (k=0;k<j+1;k++) { 1663 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1664 } 1665 } 1666 /* compute eigenvalues and eigenvectors of correlation matrix */ 1667 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1668 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1669 #if !defined(PETSC_USE_COMPLEX) 1670 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1671 #else 1672 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1673 #endif 1674 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1675 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1676 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1677 j=0; 1678 while (j < temp_constraints && singular_vals[j] < tol) j++; 1679 total_counts=total_counts-j; 1680 valid_constraints = temp_constraints-j; 1681 /* scale and copy POD basis into used quadrature memory */ 1682 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1683 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1684 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1688 if (j<temp_constraints) { 1689 PetscInt ii; 1690 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1691 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1692 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1693 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1694 for (k=0;k<temp_constraints-j;k++) { 1695 for (ii=0;ii<size_of_constraint;ii++) { 1696 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1697 } 1698 } 1699 } 1700 #else /* on missing GESVD */ 1701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1702 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1705 #if !defined(PETSC_USE_COMPLEX) 1706 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1707 #else 1708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1709 #endif 1710 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1711 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1712 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1713 k = temp_constraints; 1714 if (k > size_of_constraint) k = size_of_constraint; 1715 j = 0; 1716 while (j < k && singular_vals[k-j-1] < tol) j++; 1717 total_counts = total_counts-temp_constraints+k-j; 1718 valid_constraints = k-j; 1719 #endif /* on missing GESVD */ 1720 } 1721 /* setting change_of_basis flag is safe now */ 1722 if (boolforchange) { 1723 for (j=0;j<valid_constraints;j++) { 1724 PetscBTSet(change_basis,total_counts-j-1); 1725 } 1726 } 1727 } 1728 /* free index sets of faces, edges and vertices */ 1729 for (i=0;i<n_ISForFaces;i++) { 1730 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1731 } 1732 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1733 for (i=0;i<n_ISForEdges;i++) { 1734 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1735 } 1736 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1737 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1738 /* map temp_indices_to_constraint in boundary numbering */ 1739 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1740 if (i != temp_indices[total_counts]) { 1741 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1742 } 1743 1744 /* free workspace */ 1745 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1746 ierr = PetscFree(work);CHKERRQ(ierr); 1747 #if defined(PETSC_USE_COMPLEX) 1748 ierr = PetscFree(rwork);CHKERRQ(ierr); 1749 #endif 1750 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1751 #if defined(PETSC_MISSING_LAPACK_GESVD) 1752 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1753 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1754 #endif 1755 } 1756 for (k=0;k<nnsp_size;k++) { 1757 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1758 } 1759 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1760 1761 /* set quantities in pcbddc data structure and store previous primal size */ 1762 /* n_vertices defines the number of subdomain corners in the primal space */ 1763 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1764 olocal_primal_size = pcbddc->local_primal_size; 1765 pcbddc->local_primal_size = total_counts; 1766 pcbddc->n_vertices = n_vertices; 1767 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1768 1769 /* Create constraint matrix */ 1770 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1771 /* so we need to set it up properly either with or without change of basis */ 1772 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1773 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1774 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1775 /* array to compute a local numbering of constraints : vertices first then constraints */ 1776 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1777 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1778 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1779 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1780 /* auxiliary stuff for basis change */ 1781 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1782 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1783 1784 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1785 total_primal_vertices=0; 1786 for (i=0;i<pcbddc->local_primal_size;i++) { 1787 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1788 if (size_of_constraint == 1) { 1789 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1790 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1791 aux_primal_minloc[total_primal_vertices]=0; 1792 total_primal_vertices++; 1793 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1794 PetscInt min_loc,min_index; 1795 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1796 /* find first untouched local node */ 1797 k = 0; 1798 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1799 min_index = global_indices[k]; 1800 min_loc = k; 1801 /* search the minimum among global nodes already untouched on the cc */ 1802 for (k=1;k<size_of_constraint;k++) { 1803 /* there can be more than one constraint on a single connected component */ 1804 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1805 min_index = global_indices[k]; 1806 min_loc = k; 1807 } 1808 } 1809 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1810 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1811 aux_primal_minloc[total_primal_vertices]=min_loc; 1812 total_primal_vertices++; 1813 } 1814 } 1815 /* determine if a QR strategy is needed for change of basis */ 1816 qr_needed = PETSC_FALSE; 1817 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1818 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1819 if (PetscBTLookup(change_basis,i)) { 1820 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1821 j = 0; 1822 for (k=0;k<size_of_constraint;k++) { 1823 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1824 j++; 1825 } 1826 } 1827 /* found more than one primal dof on the cc */ 1828 if (j > 1) { 1829 PetscBTSet(qr_needed_idx,i); 1830 qr_needed = PETSC_TRUE; 1831 } 1832 } 1833 } 1834 /* free workspace */ 1835 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1836 1837 /* permute indices in order to have a sorted set of vertices */ 1838 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1839 1840 /* nonzero structure of constraint matrix */ 1841 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1842 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1843 j=total_primal_vertices; 1844 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1845 if (!PetscBTLookup(change_basis,i)) { 1846 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1847 j++; 1848 } 1849 } 1850 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1851 ierr = PetscFree(nnz);CHKERRQ(ierr); 1852 /* set values in constraint matrix */ 1853 for (i=0;i<total_primal_vertices;i++) { 1854 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1855 } 1856 total_counts = total_primal_vertices; 1857 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1858 if (!PetscBTLookup(change_basis,i)) { 1859 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1860 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1861 total_counts++; 1862 } 1863 } 1864 /* assembling */ 1865 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1866 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 /* 1868 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1869 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1870 */ 1871 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1872 if (pcbddc->use_change_of_basis) { 1873 /* dual and primal dofs on a single cc */ 1874 PetscInt dual_dofs,primal_dofs; 1875 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1876 PetscInt primal_counter; 1877 /* working stuff for GEQRF */ 1878 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1879 PetscBLASInt lqr_work; 1880 /* working stuff for UNGQR */ 1881 PetscScalar *gqr_work,lgqr_work_t; 1882 PetscBLASInt lgqr_work; 1883 /* working stuff for TRTRS */ 1884 PetscScalar *trs_rhs; 1885 PetscBLASInt Blas_NRHS; 1886 /* pointers for values insertion into change of basis matrix */ 1887 PetscInt *start_rows,*start_cols; 1888 PetscScalar *start_vals; 1889 /* working stuff for values insertion */ 1890 PetscBT is_primal; 1891 1892 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1893 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1894 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1895 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1896 /* work arrays */ 1897 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1898 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1899 /* nonzeros per row */ 1900 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1901 if (PetscBTLookup(change_basis,i)) { 1902 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1903 if (PetscBTLookup(qr_needed_idx,i)) { 1904 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1905 } else { 1906 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1907 /* get local primal index on the cc */ 1908 j = 0; 1909 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1910 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1911 } 1912 } 1913 } 1914 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1915 ierr = PetscFree(nnz);CHKERRQ(ierr); 1916 /* Set initial identity in the matrix */ 1917 for (i=0;i<pcis->n_B;i++) { 1918 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1919 } 1920 1921 if (pcbddc->dbg_flag) { 1922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1924 } 1925 1926 1927 /* Now we loop on the constraints which need a change of basis */ 1928 /* 1929 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1930 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1931 1932 Basic blocks of change of basis matrix T computed 1933 1934 - Using the following block transformation if there is only a primal dof on the cc 1935 (in the example, primal dof is the last one of the edge in LOCAL ordering 1936 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1937 | 1 0 ... 0 1 | 1938 | 0 1 ... 0 1 | 1939 | ... | 1940 | 0 ... 1 1 | 1941 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1942 1943 - via QR decomposition of constraints otherwise 1944 */ 1945 if (qr_needed) { 1946 /* space to store Q */ 1947 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1948 /* first we issue queries for optimal work */ 1949 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1952 lqr_work = -1; 1953 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1954 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1955 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1956 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1957 lgqr_work = -1; 1958 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1962 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1963 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1964 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1965 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1966 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1967 /* array to store scaling factors for reflectors */ 1968 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1969 /* array to store rhs and solution of triangular solver */ 1970 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1971 /* allocating workspace for check */ 1972 if (pcbddc->dbg_flag) { 1973 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1974 } 1975 } 1976 /* array to store whether a node is primal or not */ 1977 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1979 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1980 if (i != total_primal_vertices) { 1981 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1982 } 1983 for (i=0;i<total_primal_vertices;i++) { 1984 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1985 } 1986 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1987 1988 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1989 /* -> using implicit ordering contained in temp_indices data */ 1990 total_counts = pcbddc->n_vertices; 1991 primal_counter = total_counts; 1992 while (total_counts<pcbddc->local_primal_size) { 1993 primal_dofs = 1; 1994 if (PetscBTLookup(change_basis,total_counts)) { 1995 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1996 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1997 primal_dofs++; 1998 } 1999 /* get constraint info */ 2000 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2001 dual_dofs = size_of_constraint-primal_dofs; 2002 2003 if (pcbddc->dbg_flag) { 2004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2005 } 2006 2007 if (primal_dofs > 1) { /* QR */ 2008 2009 /* copy quadrature constraints for change of basis check */ 2010 if (pcbddc->dbg_flag) { 2011 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2012 } 2013 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2014 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2015 2016 /* compute QR decomposition of constraints */ 2017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2018 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2020 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2021 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2022 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2023 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2024 2025 /* explictly compute R^-T */ 2026 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2027 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2028 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2029 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2032 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2033 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2034 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2035 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2036 2037 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2042 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2043 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2044 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2045 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2046 2047 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2048 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2049 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2051 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2052 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2056 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2057 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2058 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2059 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2060 2061 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2062 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2063 /* insert cols for primal dofs */ 2064 for (j=0;j<primal_dofs;j++) { 2065 start_vals = &qr_basis[j*size_of_constraint]; 2066 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2067 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2068 } 2069 /* insert cols for dual dofs */ 2070 for (j=0,k=0;j<dual_dofs;k++) { 2071 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2072 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2073 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2074 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2075 j++; 2076 } 2077 } 2078 2079 /* check change of basis */ 2080 if (pcbddc->dbg_flag) { 2081 PetscInt ii,jj; 2082 PetscBool valid_qr=PETSC_TRUE; 2083 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2090 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2091 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2092 for (jj=0;jj<size_of_constraint;jj++) { 2093 for (ii=0;ii<primal_dofs;ii++) { 2094 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2095 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2096 } 2097 } 2098 if (!valid_qr) { 2099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2100 for (jj=0;jj<size_of_constraint;jj++) { 2101 for (ii=0;ii<primal_dofs;ii++) { 2102 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2103 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2104 } 2105 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2106 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2107 } 2108 } 2109 } 2110 } else { 2111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2112 } 2113 } 2114 } else { /* simple transformation block */ 2115 PetscInt row,col; 2116 PetscScalar val; 2117 for (j=0;j<size_of_constraint;j++) { 2118 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2119 if (!PetscBTLookup(is_primal,row)) { 2120 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2121 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2122 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2123 } else { 2124 for (k=0;k<size_of_constraint;k++) { 2125 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2126 if (row != col) { 2127 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2128 } else { 2129 val = 1.0; 2130 } 2131 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2132 } 2133 } 2134 } 2135 if (pcbddc->dbg_flag) { 2136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2137 } 2138 } 2139 /* increment primal counter */ 2140 primal_counter += primal_dofs; 2141 } else { 2142 if (pcbddc->dbg_flag) { 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2144 } 2145 } 2146 /* increment constraint counter total_counts */ 2147 total_counts += primal_dofs; 2148 } 2149 2150 /* free workspace */ 2151 if (qr_needed) { 2152 if (pcbddc->dbg_flag) { 2153 ierr = PetscFree(work);CHKERRQ(ierr); 2154 } 2155 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2156 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2157 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2158 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2159 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2160 } 2161 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2162 /* assembling */ 2163 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 /* 2166 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2167 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2168 */ 2169 } 2170 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2171 if (pcbddc->user_ChangeOfBasisMatrix) { 2172 PetscInt rows,cols; 2173 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2174 if (rows == pcis->n && cols == pcis->n) { 2175 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2176 } else { 2177 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2178 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2179 } 2180 } 2181 2182 /* get indices in local ordering for vertices and constraints */ 2183 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2184 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2185 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2186 } 2187 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2188 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2189 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2190 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2191 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2192 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2193 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2194 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2195 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2196 /* set quantities in PCBDDC data struct */ 2197 pcbddc->n_actual_vertices = i; 2198 /* check if a new primal space has been introduced */ 2199 pcbddc->new_primal_space_local = PETSC_TRUE; 2200 if (olocal_primal_size == pcbddc->local_primal_size) { 2201 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2202 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2203 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2204 } 2205 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2206 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2207 2208 /* flush dbg viewer */ 2209 if (pcbddc->dbg_flag) { 2210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2211 } 2212 2213 /* free workspace */ 2214 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2215 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2216 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2217 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2218 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2219 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2220 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2221 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 #undef __FUNCT__ 2226 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2227 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2228 { 2229 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2230 PC_IS *pcis = (PC_IS*)pc->data; 2231 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2232 PetscInt ierr,i,vertex_size; 2233 PetscViewer viewer=pcbddc->dbg_viewer; 2234 2235 PetscFunctionBegin; 2236 /* Reset previously computed graph */ 2237 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2238 /* Init local Graph struct */ 2239 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2240 2241 /* Check validity of the csr graph passed in by the user */ 2242 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2243 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2244 } 2245 2246 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2247 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2248 Mat mat_adj; 2249 const PetscInt *xadj,*adjncy; 2250 PetscBool flg_row=PETSC_TRUE; 2251 2252 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2253 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2254 if (!flg_row) { 2255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2256 } 2257 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2258 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2259 if (!flg_row) { 2260 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2261 } 2262 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2263 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2264 } 2265 2266 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2267 vertex_size = 1; 2268 if (pcbddc->user_provided_isfordofs) { 2269 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2270 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2271 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2272 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2273 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2274 } 2275 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2276 pcbddc->n_ISForDofs = 0; 2277 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2278 } 2279 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2280 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2281 } else { 2282 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2283 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2284 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2285 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2286 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2287 } 2288 } 2289 } 2290 2291 /* Setup of Graph */ 2292 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2293 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2294 } 2295 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2296 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2297 } 2298 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2299 2300 /* Graph's connected components analysis */ 2301 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2302 2303 /* print some info to stdout */ 2304 if (pcbddc->dbg_flag) { 2305 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2306 } 2307 2308 /* mark topography has done */ 2309 pcbddc->recompute_topography = PETSC_FALSE; 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2315 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2316 { 2317 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2318 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2319 PetscErrorCode ierr; 2320 2321 PetscFunctionBegin; 2322 n = 0; 2323 vertices = 0; 2324 if (pcbddc->ConstraintMatrix) { 2325 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2326 for (i=0;i<local_primal_size;i++) { 2327 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2328 if (size_of_constraint == 1) n++; 2329 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2330 } 2331 if (vertices_idx) { 2332 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2333 n = 0; 2334 for (i=0;i<local_primal_size;i++) { 2335 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2336 if (size_of_constraint == 1) { 2337 vertices[n++]=row_cmat_indices[0]; 2338 } 2339 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2340 } 2341 } 2342 } 2343 *n_vertices = n; 2344 if (vertices_idx) *vertices_idx = vertices; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2350 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2351 { 2352 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2353 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2354 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2355 PetscBT touched; 2356 PetscErrorCode ierr; 2357 2358 /* This function assumes that the number of local constraints per connected component 2359 is not greater than the number of nodes defined for the connected component 2360 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2361 PetscFunctionBegin; 2362 n = 0; 2363 constraints_index = 0; 2364 if (pcbddc->ConstraintMatrix) { 2365 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2366 max_size_of_constraint = 0; 2367 for (i=0;i<local_primal_size;i++) { 2368 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2369 if (size_of_constraint > 1) { 2370 n++; 2371 } 2372 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2373 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2374 } 2375 if (constraints_idx) { 2376 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2377 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2378 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2379 n = 0; 2380 for (i=0;i<local_primal_size;i++) { 2381 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2382 if (size_of_constraint > 1) { 2383 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2384 /* find first untouched local node */ 2385 j = 0; 2386 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2387 min_index = row_cmat_global_indices[j]; 2388 min_loc = j; 2389 /* search the minimum among nodes not yet touched on the connected component 2390 since there can be more than one constraint on a single cc */ 2391 for (j=1;j<size_of_constraint;j++) { 2392 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2393 min_index = row_cmat_global_indices[j]; 2394 min_loc = j; 2395 } 2396 } 2397 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2398 constraints_index[n++] = row_cmat_indices[min_loc]; 2399 } 2400 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2401 } 2402 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2403 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2404 } 2405 } 2406 *n_constraints = n; 2407 if (constraints_idx) *constraints_idx = constraints_index; 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /* the next two functions has been adapted from pcis.c */ 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "PCBDDCApplySchur" 2414 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2415 { 2416 PetscErrorCode ierr; 2417 PC_IS *pcis = (PC_IS*)(pc->data); 2418 2419 PetscFunctionBegin; 2420 if (!vec2_B) { vec2_B = v; } 2421 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2422 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2423 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2424 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2425 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2431 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2432 { 2433 PetscErrorCode ierr; 2434 PC_IS *pcis = (PC_IS*)(pc->data); 2435 2436 PetscFunctionBegin; 2437 if (!vec2_B) { vec2_B = v; } 2438 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2439 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2440 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2441 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2442 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "PCBDDCSubsetNumbering" 2448 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2449 { 2450 Vec local_vec,global_vec; 2451 IS seqis,paris; 2452 VecScatter scatter_ctx; 2453 PetscScalar *array; 2454 PetscInt *temp_global_dofs; 2455 PetscScalar globalsum; 2456 PetscInt i,j,s; 2457 PetscInt nlocals,first_index,old_index,max_local; 2458 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2459 PetscMPIInt *dof_sizes,*dof_displs; 2460 PetscBool first_found; 2461 PetscErrorCode ierr; 2462 2463 PetscFunctionBegin; 2464 /* mpi buffers */ 2465 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2466 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2467 j = ( !rank_prec_comm ? size_prec_comm : 0); 2468 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2469 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2470 /* get maximum size of subset */ 2471 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2472 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2473 max_local = 0; 2474 for (i=0;i<n_local_dofs;i++) { 2475 if (max_local < temp_global_dofs[i] ) { 2476 max_local = temp_global_dofs[i]; 2477 } 2478 } 2479 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2480 max_global++; 2481 max_local = 0; 2482 for (i=0;i<n_local_dofs;i++) { 2483 if (max_local < local_dofs[i] ) { 2484 max_local = local_dofs[i]; 2485 } 2486 } 2487 max_local++; 2488 /* allocate workspace */ 2489 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2490 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2491 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2492 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2493 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2494 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2495 /* create scatter */ 2496 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2497 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2498 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2499 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2500 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2501 /* init array */ 2502 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2503 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2504 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2505 if (local_dofs_mult) { 2506 for (i=0;i<n_local_dofs;i++) { 2507 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2508 } 2509 } else { 2510 for (i=0;i<n_local_dofs;i++) { 2511 array[local_dofs[i]]=1.0; 2512 } 2513 } 2514 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2515 /* scatter into global vec and get total number of global dofs */ 2516 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2519 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2520 /* Fill global_vec with cumulative function for global numbering */ 2521 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2522 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2523 nlocals = 0; 2524 first_index = -1; 2525 first_found = PETSC_FALSE; 2526 for (i=0;i<s;i++) { 2527 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2528 first_found = PETSC_TRUE; 2529 first_index = i; 2530 } 2531 nlocals += (PetscInt)PetscRealPart(array[i]); 2532 } 2533 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2534 if (!rank_prec_comm) { 2535 dof_displs[0]=0; 2536 for (i=1;i<size_prec_comm;i++) { 2537 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2538 } 2539 } 2540 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2541 if (first_found) { 2542 array[first_index] += (PetscScalar)nlocals; 2543 old_index = first_index; 2544 for (i=first_index+1;i<s;i++) { 2545 if (PetscRealPart(array[i]) > 0.1) { 2546 array[i] += array[old_index]; 2547 old_index = i; 2548 } 2549 } 2550 } 2551 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2552 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2553 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2554 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2555 /* get global ordering of local dofs */ 2556 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2557 if (local_dofs_mult) { 2558 for (i=0;i<n_local_dofs;i++) { 2559 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2560 } 2561 } else { 2562 for (i=0;i<n_local_dofs;i++) { 2563 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2564 } 2565 } 2566 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2567 /* free workspace */ 2568 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2569 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2570 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2571 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2572 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2573 /* return pointer to global ordering of local dofs */ 2574 *global_numbering_subset = temp_global_dofs; 2575 PetscFunctionReturn(0); 2576 } 2577 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2580 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2581 { 2582 PetscInt i,j; 2583 PetscScalar *alphas; 2584 PetscErrorCode ierr; 2585 2586 PetscFunctionBegin; 2587 /* this implements stabilized Gram-Schmidt */ 2588 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2589 for (i=0;i<n;i++) { 2590 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2591 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2592 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2593 } 2594 ierr = PetscFree(alphas);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatISGetSubassemblingPattern" 2600 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2601 { 2602 Mat subdomain_adj; 2603 IS new_ranks,ranks_send_to; 2604 MatPartitioning partitioner; 2605 Mat_IS *matis; 2606 PetscInt n_neighs,*neighs,*n_shared,**shared; 2607 PetscInt prank; 2608 PetscMPIInt size,rank,color; 2609 PetscInt *xadj,*adjncy,*oldranks; 2610 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2611 PetscInt i,j,local_size,threshold=0; 2612 PetscErrorCode ierr; 2613 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2614 PetscSubcomm subcomm; 2615 2616 PetscFunctionBegin; 2617 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2618 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2619 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2620 2621 /* Get info on mapping */ 2622 matis = (Mat_IS*)(mat->data); 2623 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2624 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2625 2626 /* build local CSR graph of subdomains' connectivity */ 2627 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2628 xadj[0] = 0; 2629 xadj[1] = PetscMax(n_neighs-1,0); 2630 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2631 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2632 2633 if (threshold) { 2634 PetscInt* count,min_threshold; 2635 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2636 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2637 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2638 for (j=0;j<n_shared[i];j++) { 2639 count[shared[i][j]] += 1; 2640 } 2641 } 2642 /* adapt threshold since we dont want to lose significant connections */ 2643 min_threshold = n_neighs; 2644 for (i=1;i<n_neighs;i++) { 2645 for (j=0;j<n_shared[i];j++) { 2646 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2647 } 2648 } 2649 threshold = PetscMax(min_threshold+1,threshold); 2650 xadj[1] = 0; 2651 for (i=1;i<n_neighs;i++) { 2652 for (j=0;j<n_shared[i];j++) { 2653 if (count[shared[i][j]] < threshold) { 2654 adjncy[xadj[1]] = neighs[i]; 2655 adjncy_wgt[xadj[1]] = n_shared[i]; 2656 xadj[1]++; 2657 break; 2658 } 2659 } 2660 } 2661 ierr = PetscFree(count);CHKERRQ(ierr); 2662 } else { 2663 if (xadj[1]) { 2664 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2665 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2666 } 2667 } 2668 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2669 if (use_square) { 2670 for (i=0;i<xadj[1];i++) { 2671 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2672 } 2673 } 2674 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2675 2676 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2677 2678 /* 2679 Restrict work on active processes only. 2680 */ 2681 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2682 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2683 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2684 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2685 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2686 if (color) { 2687 ierr = PetscFree(xadj);CHKERRQ(ierr); 2688 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2689 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2690 } else { 2691 PetscInt coarsening_ratio; 2692 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2693 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2694 prank = rank; 2695 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2696 /* 2697 for (i=0;i<size;i++) { 2698 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2699 } 2700 */ 2701 for (i=0;i<xadj[1];i++) { 2702 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2703 } 2704 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2705 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2706 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2707 2708 /* Partition */ 2709 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2710 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2711 if (use_vwgt) { 2712 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2713 v_wgt[0] = local_size; 2714 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2715 } 2716 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2717 coarsening_ratio = size/n_subdomains; 2718 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2719 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2720 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2721 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2722 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2723 2724 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2725 if (contiguous) { 2726 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2727 } else { 2728 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2729 } 2730 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2731 /* clean up */ 2732 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2733 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2734 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2735 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2736 } 2737 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2738 2739 /* assemble parallel IS for sends */ 2740 i = 1; 2741 if (color) i=0; 2742 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2743 2744 /* get back IS */ 2745 *is_sends = ranks_send_to; 2746 PetscFunctionReturn(0); 2747 } 2748 2749 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatISSubassemble" 2753 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2754 { 2755 Mat local_mat; 2756 Mat_IS *matis; 2757 IS is_sends_internal; 2758 PetscInt rows,cols; 2759 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2760 PetscBool ismatis,isdense,destroy_mat; 2761 ISLocalToGlobalMapping l2gmap; 2762 PetscInt* l2gmap_indices; 2763 const PetscInt* is_indices; 2764 MatType new_local_type; 2765 /* buffers */ 2766 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2767 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2768 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2769 /* MPI */ 2770 MPI_Comm comm,comm_n; 2771 PetscSubcomm subcomm; 2772 PetscMPIInt n_sends,n_recvs,commsize; 2773 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2774 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2775 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2776 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2777 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 /* TODO: add missing checks */ 2782 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2783 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2784 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2785 PetscValidLogicalCollectiveInt(mat,nis,7); 2786 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2787 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2788 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2789 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2790 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2791 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2792 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2793 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2794 PetscInt mrows,mcols,mnrows,mncols; 2795 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2796 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2797 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2798 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2799 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2800 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2801 } 2802 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2803 PetscValidLogicalCollectiveInt(mat,bs,0); 2804 /* prepare IS for sending if not provided */ 2805 if (!is_sends) { 2806 PetscBool pcontig = PETSC_TRUE; 2807 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2808 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2809 } else { 2810 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2811 is_sends_internal = is_sends; 2812 } 2813 2814 /* get pointer of MATIS data */ 2815 matis = (Mat_IS*)mat->data; 2816 2817 /* get comm */ 2818 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2819 2820 /* compute number of sends */ 2821 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2822 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2823 2824 /* compute number of receives */ 2825 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2826 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2827 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2828 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2829 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2830 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2831 ierr = PetscFree(iflags);CHKERRQ(ierr); 2832 2833 /* restrict comm if requested */ 2834 subcomm = 0; 2835 destroy_mat = PETSC_FALSE; 2836 if (restrict_comm) { 2837 PetscMPIInt color,rank,subcommsize; 2838 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2839 color = 0; 2840 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2841 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2842 subcommsize = commsize - subcommsize; 2843 /* check if reuse has been requested */ 2844 if (reuse == MAT_REUSE_MATRIX) { 2845 if (*mat_n) { 2846 PetscMPIInt subcommsize2; 2847 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 2848 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 2849 comm_n = PetscObjectComm((PetscObject)*mat_n); 2850 } else { 2851 comm_n = PETSC_COMM_SELF; 2852 } 2853 } else { /* MAT_INITIAL_MATRIX */ 2854 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 2855 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 2856 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2857 comm_n = subcomm->comm; 2858 } 2859 /* flag to destroy *mat_n if not significative */ 2860 if (color) destroy_mat = PETSC_TRUE; 2861 } else { 2862 comm_n = comm; 2863 } 2864 2865 /* prepare send/receive buffers */ 2866 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2867 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2868 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2869 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2870 if (nis) { 2871 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 2872 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 2873 } 2874 2875 /* Get data from local matrices */ 2876 if (!isdense) { 2877 /* TODO: See below some guidelines on how to prepare the local buffers */ 2878 /* 2879 send_buffer_vals should contain the raw values of the local matrix 2880 send_buffer_idxs should contain: 2881 - MatType_PRIVATE type 2882 - PetscInt size_of_l2gmap 2883 - PetscInt global_row_indices[size_of_l2gmap] 2884 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2885 */ 2886 } else { 2887 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2888 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2889 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2890 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2891 send_buffer_idxs[1] = i; 2892 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2893 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2894 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2895 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2896 for (i=0;i<n_sends;i++) { 2897 ilengths_vals[is_indices[i]] = len*len; 2898 ilengths_idxs[is_indices[i]] = len+2; 2899 } 2900 } 2901 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2902 /* additional is (if any) */ 2903 if (nis) { 2904 PetscMPIInt psum; 2905 PetscInt j; 2906 for (j=0,psum=0;j<nis;j++) { 2907 PetscInt plen; 2908 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2909 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 2910 psum += len+1; /* indices + lenght */ 2911 } 2912 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 2913 for (j=0,psum=0;j<nis;j++) { 2914 PetscInt plen; 2915 const PetscInt *is_array_idxs; 2916 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2917 send_buffer_idxs_is[psum] = plen; 2918 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2919 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 2920 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2921 psum += plen+1; /* indices + lenght */ 2922 } 2923 for (i=0;i<n_sends;i++) { 2924 ilengths_idxs_is[is_indices[i]] = psum; 2925 } 2926 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 2927 } 2928 2929 buf_size_idxs = 0; 2930 buf_size_vals = 0; 2931 buf_size_idxs_is = 0; 2932 for (i=0;i<n_recvs;i++) { 2933 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2934 buf_size_vals += (PetscInt)olengths_vals[i]; 2935 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 2936 } 2937 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2938 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2939 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 2940 2941 /* get new tags for clean communications */ 2942 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2943 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2944 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 2945 2946 /* allocate for requests */ 2947 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2948 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2949 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 2950 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2951 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2952 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 2953 2954 /* communications */ 2955 ptr_idxs = recv_buffer_idxs; 2956 ptr_vals = recv_buffer_vals; 2957 ptr_idxs_is = recv_buffer_idxs_is; 2958 for (i=0;i<n_recvs;i++) { 2959 source_dest = onodes[i]; 2960 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2961 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2962 ptr_idxs += olengths_idxs[i]; 2963 ptr_vals += olengths_vals[i]; 2964 if (nis) { 2965 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 2966 ptr_idxs_is += olengths_idxs_is[i]; 2967 } 2968 } 2969 for (i=0;i<n_sends;i++) { 2970 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2971 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2972 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2973 if (nis) { 2974 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 2975 } 2976 } 2977 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2978 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2979 2980 /* assemble new l2g map */ 2981 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2982 ptr_idxs = recv_buffer_idxs; 2983 buf_size_idxs = 0; 2984 for (i=0;i<n_recvs;i++) { 2985 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2986 ptr_idxs += olengths_idxs[i]; 2987 } 2988 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 2989 ptr_idxs = recv_buffer_idxs; 2990 buf_size_idxs = 0; 2991 for (i=0;i<n_recvs;i++) { 2992 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2993 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2994 ptr_idxs += olengths_idxs[i]; 2995 } 2996 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2997 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2998 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2999 3000 /* infer new local matrix type from received local matrices type */ 3001 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3002 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3003 if (n_recvs) { 3004 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3005 ptr_idxs = recv_buffer_idxs; 3006 for (i=0;i<n_recvs;i++) { 3007 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3008 new_local_type_private = MATAIJ_PRIVATE; 3009 break; 3010 } 3011 ptr_idxs += olengths_idxs[i]; 3012 } 3013 switch (new_local_type_private) { 3014 case MATDENSE_PRIVATE: 3015 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3016 new_local_type = MATSEQAIJ; 3017 bs = 1; 3018 } else { /* if I receive only 1 dense matrix */ 3019 new_local_type = MATSEQDENSE; 3020 bs = 1; 3021 } 3022 break; 3023 case MATAIJ_PRIVATE: 3024 new_local_type = MATSEQAIJ; 3025 bs = 1; 3026 break; 3027 case MATBAIJ_PRIVATE: 3028 new_local_type = MATSEQBAIJ; 3029 break; 3030 case MATSBAIJ_PRIVATE: 3031 new_local_type = MATSEQSBAIJ; 3032 break; 3033 default: 3034 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3035 break; 3036 } 3037 } else { /* by default, new_local_type is seqdense */ 3038 new_local_type = MATSEQDENSE; 3039 bs = 1; 3040 } 3041 3042 /* create MATIS object if needed */ 3043 if (reuse == MAT_INITIAL_MATRIX) { 3044 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3045 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3046 } else { 3047 /* it also destroys the local matrices */ 3048 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3049 } 3050 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3051 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3052 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3053 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3054 3055 /* set values */ 3056 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3057 ptr_vals = recv_buffer_vals; 3058 ptr_idxs = recv_buffer_idxs; 3059 for (i=0;i<n_recvs;i++) { 3060 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3061 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3062 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3063 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3064 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3065 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3066 } else { 3067 /* TODO */ 3068 } 3069 ptr_idxs += olengths_idxs[i]; 3070 ptr_vals += olengths_vals[i]; 3071 } 3072 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3075 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3076 3077 #if 0 3078 if (!restrict_comm) { /* check */ 3079 Vec lvec,rvec; 3080 PetscReal infty_error; 3081 3082 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3083 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3084 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3085 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3086 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3087 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3088 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3089 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3090 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3091 } 3092 #endif 3093 3094 /* assemble new additional is (if any) */ 3095 if (nis) { 3096 PetscInt **temp_idxs,*count_is,j,psum; 3097 3098 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3099 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3100 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3101 ptr_idxs = recv_buffer_idxs_is; 3102 psum = 0; 3103 for (i=0;i<n_recvs;i++) { 3104 for (j=0;j<nis;j++) { 3105 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3106 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3107 psum += plen; 3108 ptr_idxs += plen+1; /* shift pointer to received data */ 3109 } 3110 } 3111 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3112 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3113 for (i=1;i<nis;i++) { 3114 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3115 } 3116 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3117 ptr_idxs = recv_buffer_idxs_is; 3118 for (i=0;i<n_recvs;i++) { 3119 for (j=0;j<nis;j++) { 3120 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3121 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3122 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3123 ptr_idxs += plen+1; /* shift pointer to received data */ 3124 } 3125 } 3126 for (i=0;i<nis;i++) { 3127 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3128 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3129 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3130 } 3131 ierr = PetscFree(count_is);CHKERRQ(ierr); 3132 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3133 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3134 } 3135 /* free workspace */ 3136 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3137 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3138 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3139 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3140 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3141 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3142 if (isdense) { 3143 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3144 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3145 } else { 3146 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3147 } 3148 if (nis) { 3149 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3150 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3151 } 3152 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3153 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3154 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3155 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3156 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3157 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3158 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3159 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3160 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3161 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3162 ierr = PetscFree(onodes);CHKERRQ(ierr); 3163 if (nis) { 3164 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3165 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3166 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3167 } 3168 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3169 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3170 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3171 for (i=0;i<nis;i++) { 3172 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3173 } 3174 } 3175 PetscFunctionReturn(0); 3176 } 3177 3178 /* temporary hack into ksp private data structure */ 3179 #include <petsc-private/kspimpl.h> 3180 3181 #undef __FUNCT__ 3182 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3183 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3184 { 3185 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3186 PC_IS *pcis = (PC_IS*)pc->data; 3187 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3188 MatNullSpace CoarseNullSpace=NULL; 3189 ISLocalToGlobalMapping coarse_islg; 3190 IS coarse_is,*isarray; 3191 PetscInt i,im_active=-1,active_procs=-1; 3192 PetscInt nis,nisdofs,nisneu; 3193 PC pc_temp; 3194 PCType coarse_pc_type; 3195 KSPType coarse_ksp_type; 3196 PetscBool multilevel_requested,multilevel_allowed; 3197 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3198 Mat t_coarse_mat_is; 3199 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3200 PetscMPIInt all_procs; 3201 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3202 PetscBool compute_vecs = PETSC_FALSE; 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 /* Assign global numbering to coarse dofs */ 3207 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3208 compute_vecs = PETSC_TRUE; 3209 PetscInt ocoarse_size; 3210 ocoarse_size = pcbddc->coarse_size; 3211 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3212 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3213 /* see if we can avoid some work */ 3214 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3215 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3216 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3217 coarse_reuse = PETSC_FALSE; 3218 } else { /* we can safely reuse already computed coarse matrix */ 3219 coarse_reuse = PETSC_TRUE; 3220 } 3221 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3222 coarse_reuse = PETSC_FALSE; 3223 } 3224 /* reset any subassembling information */ 3225 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3226 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3227 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3228 coarse_reuse = PETSC_TRUE; 3229 } 3230 3231 /* count "active" (i.e. with positive local size) and "void" processes */ 3232 im_active = !!(pcis->n); 3233 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3234 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3235 void_procs = all_procs-active_procs; 3236 csin_type_simple = PETSC_TRUE; 3237 if (pcbddc->current_level) { 3238 csin_ml = PETSC_TRUE; 3239 ncoarse_ml = void_procs; 3240 csin_ds = PETSC_TRUE; 3241 ncoarse_ds = void_procs; 3242 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3243 } else { 3244 csin_ml = PETSC_FALSE; 3245 ncoarse_ml = all_procs; 3246 if (void_procs) { 3247 csin_ds = PETSC_TRUE; 3248 ncoarse_ds = void_procs; 3249 csin_type_simple = PETSC_FALSE; 3250 } else { 3251 csin_ds = PETSC_FALSE; 3252 ncoarse_ds = all_procs; 3253 } 3254 } 3255 3256 /* 3257 test if we can go multilevel: three conditions must be satisfied: 3258 - we have not exceeded the number of levels requested 3259 - we can actually subassemble the active processes 3260 - we can find a suitable number of MPI processes where we can place the subassembled problem 3261 */ 3262 multilevel_allowed = PETSC_FALSE; 3263 multilevel_requested = PETSC_FALSE; 3264 if (pcbddc->current_level < pcbddc->max_levels) { 3265 multilevel_requested = PETSC_TRUE; 3266 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3267 multilevel_allowed = PETSC_FALSE; 3268 } else { 3269 multilevel_allowed = PETSC_TRUE; 3270 } 3271 } 3272 /* determine number of process partecipating to coarse solver */ 3273 if (multilevel_allowed) { 3274 ncoarse = ncoarse_ml; 3275 csin = csin_ml; 3276 } else { 3277 ncoarse = ncoarse_ds; 3278 csin = csin_ds; 3279 } 3280 3281 /* creates temporary l2gmap and IS for coarse indexes */ 3282 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3283 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3284 3285 /* creates temporary MATIS object for coarse matrix */ 3286 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3287 #if 0 3288 { 3289 PetscViewer viewer; 3290 char filename[256]; 3291 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3292 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3293 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3294 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3295 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3296 } 3297 #endif 3298 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3299 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3300 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3303 3304 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3305 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3306 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3307 const PetscInt *idxs; 3308 ISLocalToGlobalMapping tmap; 3309 3310 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3311 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3312 /* allocate space for temporary storage */ 3313 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3314 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3315 /* allocate for IS array */ 3316 nisdofs = pcbddc->n_ISForDofsLocal; 3317 nisneu = !!pcbddc->NeumannBoundariesLocal; 3318 nis = nisdofs + nisneu; 3319 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3320 /* dofs splitting */ 3321 for (i=0;i<nisdofs;i++) { 3322 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3323 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3324 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3325 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3326 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3327 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3328 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3329 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3330 } 3331 /* neumann boundaries */ 3332 if (pcbddc->NeumannBoundariesLocal) { 3333 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3334 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3335 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3336 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3337 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3338 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3339 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3340 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3341 } 3342 /* free memory */ 3343 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3344 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3345 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3346 } else { 3347 nis = 0; 3348 nisdofs = 0; 3349 nisneu = 0; 3350 isarray = NULL; 3351 } 3352 /* destroy no longer needed map */ 3353 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3354 3355 /* restrict on coarse candidates (if needed) */ 3356 coarse_mat_is = NULL; 3357 if (csin) { 3358 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3359 PetscInt j,tissize,*nisindices; 3360 PetscInt *coarse_candidates; 3361 const PetscInt* tisindices; 3362 /* get coarse candidates' ranks in pc communicator */ 3363 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3364 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3365 for (i=0,j=0;i<all_procs;i++) { 3366 if (!coarse_candidates[i]) { 3367 coarse_candidates[j]=i; 3368 j++; 3369 } 3370 } 3371 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3372 /* get a suitable subassembling pattern */ 3373 if (csin_type_simple) { 3374 PetscMPIInt rank; 3375 PetscInt issize,isidx; 3376 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3377 if (im_active) { 3378 issize = 1; 3379 isidx = (PetscInt)rank; 3380 } else { 3381 issize = 0; 3382 isidx = -1; 3383 } 3384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3385 } else { 3386 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3387 } 3388 if (pcbddc->dbg_flag) { 3389 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3390 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3391 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3393 for (i=0;i<j;i++) { 3394 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3395 } 3396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3397 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3398 } 3399 /* shift the pattern on coarse candidates */ 3400 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3401 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3402 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3403 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3404 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3405 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3406 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3407 } 3408 if (pcbddc->dbg_flag) { 3409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3410 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3411 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3413 } 3414 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3415 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3416 } else { 3417 if (pcbddc->dbg_flag) { 3418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3419 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3420 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3421 } 3422 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3423 coarse_mat_is = t_coarse_mat_is; 3424 } 3425 3426 /* create local to global scatters for coarse problem */ 3427 if (compute_vecs) { 3428 PetscInt lrows; 3429 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3430 if (coarse_mat_is) { 3431 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3432 } else { 3433 lrows = 0; 3434 } 3435 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3436 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3437 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3438 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3439 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3440 } 3441 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3442 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3443 3444 /* set defaults for coarse KSP and PC */ 3445 if (multilevel_allowed) { 3446 coarse_ksp_type = KSPRICHARDSON; 3447 coarse_pc_type = PCBDDC; 3448 } else { 3449 coarse_ksp_type = KSPPREONLY; 3450 coarse_pc_type = PCREDUNDANT; 3451 } 3452 3453 /* print some info if requested */ 3454 if (pcbddc->dbg_flag) { 3455 if (!multilevel_allowed) { 3456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3457 if (multilevel_requested) { 3458 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3459 } else if (pcbddc->max_levels) { 3460 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3461 } 3462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3463 } 3464 } 3465 3466 /* create the coarse KSP object only once with defaults */ 3467 if (coarse_mat_is) { 3468 MatReuse coarse_mat_reuse; 3469 PetscViewer dbg_viewer; 3470 if (pcbddc->dbg_flag) { 3471 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3472 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3473 } 3474 if (!pcbddc->coarse_ksp) { 3475 char prefix[256],str_level[16]; 3476 size_t len; 3477 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3478 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3479 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3480 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3481 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3482 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3483 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3484 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3485 /* prefix */ 3486 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3487 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3488 if (!pcbddc->current_level) { 3489 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3490 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3491 } else { 3492 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3493 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3494 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3495 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3496 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3497 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3498 } 3499 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3500 /* allow user customization */ 3501 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3502 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3503 } 3504 3505 /* get some info after set from options */ 3506 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3507 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3508 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3509 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3510 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3511 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3512 isbddc = PETSC_FALSE; 3513 } 3514 if (isredundant) { 3515 KSP inner_ksp; 3516 PC inner_pc; 3517 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3518 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3519 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3520 } 3521 3522 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3523 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3524 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3525 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3526 if (nisdofs) { 3527 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3528 for (i=0;i<nisdofs;i++) { 3529 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3530 } 3531 } 3532 if (nisneu) { 3533 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3534 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3535 } 3536 3537 /* assemble coarse matrix */ 3538 if (coarse_reuse) { 3539 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3540 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3541 coarse_mat_reuse = MAT_REUSE_MATRIX; 3542 } else { 3543 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3544 } 3545 if (isbddc || isnn) { 3546 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3547 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3548 if (pcbddc->dbg_flag) { 3549 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3550 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3551 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3552 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3553 } 3554 } 3555 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3556 } else { 3557 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3558 } 3559 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3560 3561 /* propagate symmetry info to coarse matrix */ 3562 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3563 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3564 3565 /* set operators */ 3566 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3567 if (pcbddc->dbg_flag) { 3568 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3569 } 3570 } else { /* processes non partecipating to coarse solver (if any) */ 3571 coarse_mat = 0; 3572 } 3573 ierr = PetscFree(isarray);CHKERRQ(ierr); 3574 #if 0 3575 { 3576 PetscViewer viewer; 3577 char filename[256]; 3578 sprintf(filename,"coarse_mat.m"); 3579 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3580 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3581 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3582 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3583 } 3584 #endif 3585 3586 /* Compute coarse null space (special handling by BDDC only) */ 3587 if (pcbddc->NullSpace) { 3588 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3589 } 3590 3591 if (pcbddc->coarse_ksp) { 3592 Vec crhs,csol; 3593 PetscBool ispreonly; 3594 if (CoarseNullSpace) { 3595 if (isbddc) { 3596 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3597 } else { 3598 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3599 } 3600 } 3601 /* setup coarse ksp */ 3602 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3603 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3604 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3605 /* hack */ 3606 if (!csol) { 3607 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3608 } 3609 if (!crhs) { 3610 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3611 } 3612 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3613 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3614 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3615 KSP check_ksp; 3616 KSPType check_ksp_type; 3617 PC check_pc; 3618 Vec check_vec,coarse_vec; 3619 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3620 PetscInt its; 3621 PetscBool compute_eigs; 3622 PetscReal *eigs_r,*eigs_c; 3623 PetscInt neigs; 3624 const char *prefix; 3625 3626 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3627 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3628 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3629 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3630 if (ispreonly) { 3631 check_ksp_type = KSPPREONLY; 3632 compute_eigs = PETSC_FALSE; 3633 } else { 3634 check_ksp_type = KSPGMRES; 3635 compute_eigs = PETSC_TRUE; 3636 } 3637 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3638 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3639 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3640 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3641 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3642 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3643 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3644 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3645 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3646 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3647 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3648 /* create random vec */ 3649 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3650 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3651 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3652 if (CoarseNullSpace) { 3653 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3654 } 3655 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3656 /* solve coarse problem */ 3657 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3658 if (CoarseNullSpace) { 3659 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3660 } 3661 /* set eigenvalue estimation if preonly has not been requested */ 3662 if (compute_eigs) { 3663 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3664 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3665 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3666 lambda_max = eigs_r[neigs-1]; 3667 lambda_min = eigs_r[0]; 3668 if (pcbddc->use_coarse_estimates) { 3669 if (lambda_max>lambda_min) { 3670 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3671 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3672 } 3673 } 3674 } 3675 3676 /* check coarse problem residual error */ 3677 if (pcbddc->dbg_flag) { 3678 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3679 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3680 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3681 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3682 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3683 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3684 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3685 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3686 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3687 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3688 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3689 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3690 if (compute_eigs) { 3691 PetscReal lambda_max_s,lambda_min_s; 3692 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3693 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3694 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3695 for (i=0;i<neigs;i++) { 3696 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3697 } 3698 } 3699 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3700 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3701 } 3702 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3703 if (compute_eigs) { 3704 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3705 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3706 } 3707 } 3708 } 3709 /* print additional info */ 3710 if (pcbddc->dbg_flag) { 3711 /* waits until all processes reaches this point */ 3712 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3713 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3715 } 3716 3717 /* free memory */ 3718 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3719 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3720 PetscFunctionReturn(0); 3721 } 3722 3723 #undef __FUNCT__ 3724 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3725 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3726 { 3727 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3728 PC_IS* pcis = (PC_IS*)pc->data; 3729 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3730 PetscInt i,coarse_size; 3731 PetscInt *local_primal_indices; 3732 PetscErrorCode ierr; 3733 3734 PetscFunctionBegin; 3735 /* Compute global number of coarse dofs */ 3736 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3737 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3738 } 3739 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3740 3741 /* check numbering */ 3742 if (pcbddc->dbg_flag) { 3743 PetscScalar coarsesum,*array; 3744 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3745 3746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3748 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3749 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3750 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3751 for (i=0;i<pcbddc->local_primal_size;i++) { 3752 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3753 } 3754 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3755 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3756 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3757 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3758 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3759 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3760 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3761 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3762 for (i=0;i<pcis->n;i++) { 3763 if (array[i] == 1.0) { 3764 set_error = PETSC_TRUE; 3765 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3766 } 3767 } 3768 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3769 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3770 for (i=0;i<pcis->n;i++) { 3771 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3772 } 3773 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3774 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3775 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3776 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3777 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3778 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3779 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3780 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3781 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3782 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3783 for (i=0;i<pcbddc->local_primal_size;i++) { 3784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3785 } 3786 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3787 } 3788 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3789 if (set_error_reduced) { 3790 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3791 } 3792 } 3793 /* get back data */ 3794 *coarse_size_n = coarse_size; 3795 *local_primal_indices_n = local_primal_indices; 3796 PetscFunctionReturn(0); 3797 } 3798 3799 #undef __FUNCT__ 3800 #define __FUNCT__ "PCBDDCGlobalToLocal" 3801 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3802 { 3803 IS localis_t; 3804 PetscInt i,lsize,*idxs,n; 3805 PetscScalar *vals; 3806 PetscErrorCode ierr; 3807 3808 PetscFunctionBegin; 3809 /* get indices in local ordering exploiting local to global map */ 3810 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3811 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3812 for (i=0;i<lsize;i++) vals[i] = 1.0; 3813 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3814 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 3815 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 3816 if (idxs) { /* multilevel guard */ 3817 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3818 } 3819 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 3820 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3821 ierr = PetscFree(vals);CHKERRQ(ierr); 3822 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 3823 /* now compute set in local ordering */ 3824 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3825 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3826 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3827 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 3828 for (i=0,lsize=0;i<n;i++) { 3829 if (PetscRealPart(vals[i]) > 0.5) { 3830 lsize++; 3831 } 3832 } 3833 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3834 for (i=0,lsize=0;i<n;i++) { 3835 if (PetscRealPart(vals[i]) > 0.5) { 3836 idxs[lsize++] = i; 3837 } 3838 } 3839 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3840 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3841 *localis = localis_t; 3842 PetscFunctionReturn(0); 3843 } 3844