1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 536 if (!pcbddc->issym) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 unsymmetric_check = PETSC_TRUE; 624 } else { /* take references to already computed coarse basis */ 625 unsymmetric_check = PETSC_FALSE; 626 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 627 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 628 if (pcbddc->coarse_phi_D) { 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 630 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 631 } 632 } 633 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 634 /* Checking coarse_sub_mat and coarse basis functios */ 635 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 637 if (pcbddc->dbg_flag) { 638 Mat coarse_sub_mat; 639 Mat AUXMAT,TM1,TM2,TM3,TM4; 640 Mat coarse_phi_D,coarse_phi_B; 641 Mat coarse_psi_D,coarse_psi_B; 642 Mat A_II,A_BB,A_IB,A_BI; 643 MatType checkmattype=MATSEQAIJ; 644 PetscReal real_value; 645 646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 652 if (unsymmetric_check) { 653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 655 } 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 657 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } else { 675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } 684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 695 for (i=0;i<pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 699 for (i=0;i<pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 if (unsymmetric_check) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 704 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 708 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 } 712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 722 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 723 if (unsymmetric_check) { 724 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 729 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 730 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 731 } 732 /* free memory */ 733 if (n_vertices) { 734 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 738 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 739 } 740 if (n_constraints) { 741 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 742 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 743 ierr = MatDestroy(&M1);CHKERRQ(ierr); 744 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(auxindices);CHKERRQ(ierr); 747 /* get back data */ 748 *coarse_submat_vals_n = coarse_submat_vals; 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 755 { 756 PC_IS* pcis = (PC_IS*)(pc->data); 757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 759 PetscBool issbaij,isseqaij; 760 /* manage repeated solves */ 761 MatReuse reuse; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 if (pc->flag == SAME_NONZERO_PATTERN) { 772 reuse = MAT_REUSE_MATRIX; 773 } else { 774 reuse = MAT_INITIAL_MATRIX; 775 } 776 } 777 if (reuse == MAT_INITIAL_MATRIX) { 778 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 783 } 784 785 /* transform local matrices if needed */ 786 if (pcbddc->ChangeOfBasisMatrix) { 787 Mat change_mat_all; 788 PetscScalar *row_cmat_values; 789 PetscInt *row_cmat_indices; 790 PetscInt *nnz,*is_indices,*temp_indices; 791 PetscInt i,j,k,n_D,n_B; 792 793 /* Get Non-overlapping dimensions */ 794 n_B = pcis->n_B; 795 n_D = pcis->n-n_B; 796 797 /* compute nonzero structure of change of basis on all local nodes */ 798 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 801 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 k=1; 804 for (i=0;i<n_B;i++) { 805 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 nnz[is_indices[i]]=j; 807 if (k < j) k = j; 808 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 } 810 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 /* assemble change of basis matrix on the whole set of local dofs */ 812 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 813 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 814 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 815 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 816 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 for (i=0;i<n_D;i++) { 819 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0;i<n_B;i++) { 824 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 826 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 } 829 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 831 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 833 if (isseqaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* 888 No need to setup local scatters if 889 - primal space is unchanged 890 AND 891 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 892 AND 893 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 894 */ 895 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 896 PetscFunctionReturn(0); 897 } 898 /* destroy old objects */ 899 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 902 /* Set Non-overlapping dimensions */ 903 n_B = pcis->n_B; n_D = pcis->n - n_B; 904 n_vertices = pcbddc->n_actual_vertices; 905 /* create auxiliary bitmask */ 906 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 909 } 910 911 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 912 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 913 for (i=0, n_R=0; i<pcis->n; i++) { 914 if (!PetscBTLookup(bitmask,i)) { 915 idx_R_local[n_R] = i; 916 n_R++; 917 } 918 } 919 920 /* Block code */ 921 vbs = 1; 922 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 923 if (bs>1 && !(n_vertices%bs)) { 924 PetscBool is_blocked = PETSC_TRUE; 925 PetscInt *vary; 926 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 927 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 928 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 929 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 930 for (i=0; i<n_vertices; i++) { 931 if (vary[i]!=0 && vary[i]!=bs) { 932 is_blocked = PETSC_FALSE; 933 break; 934 } 935 } 936 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 937 vbs = bs; 938 for (i=0;i<n_R/vbs;i++) { 939 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 940 } 941 } 942 ierr = PetscFree(vary);CHKERRQ(ierr); 943 } 944 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 945 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 946 947 /* print some info if requested */ 948 if (pcbddc->dbg_flag) { 949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 } 958 959 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 960 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 962 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 963 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0; i<n_D; i++) { 965 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_R; i++) { 969 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 970 aux_array1[j++] = i; 971 } 972 } 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 974 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_B; i++) { 976 if (!PetscBTLookup(bitmask,is_indices[i])) { 977 aux_array2[j++] = i; 978 } 979 } 980 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 985 986 if (pcbddc->switch_static || pcbddc->dbg_flag) { 987 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 988 for (i=0, j=0; i<n_R; i++) { 989 if (PetscBTLookup(bitmask,idx_R_local[i])) { 990 aux_array1[j++] = i; 991 } 992 } 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 994 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 995 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 996 } 997 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1006 { 1007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1008 PC_IS *pcis = (PC_IS*)pc->data; 1009 PC pc_temp; 1010 Mat A_RR; 1011 MatReuse reuse; 1012 PetscScalar m_one = -1.0; 1013 PetscReal value; 1014 PetscInt n_D,n_R,ibs,mbs; 1015 PetscBool use_exact,use_exact_reduced,issbaij; 1016 PetscErrorCode ierr; 1017 /* prefixes stuff */ 1018 char dir_prefix[256],neu_prefix[256],str_level[16]; 1019 size_t len; 1020 1021 PetscFunctionBegin; 1022 1023 /* compute prefixes */ 1024 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1025 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1026 if (!pcbddc->current_level) { 1027 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 } else { 1032 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1033 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1034 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1035 len -= 15; /* remove "pc_bddc_coarse_" */ 1036 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1037 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1038 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1039 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1040 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1041 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1042 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1043 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1044 } 1045 1046 /* DIRICHLET PROBLEM */ 1047 /* Matrix for Dirichlet problem is pcis->A_II */ 1048 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1049 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1050 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1051 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1052 /* default */ 1053 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1054 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1055 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1056 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1057 if (issbaij) { 1058 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1059 } else { 1060 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1061 } 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1065 } 1066 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1067 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1068 if (!n_D) { 1069 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1070 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1071 } 1072 /* Set Up KSP for Dirichlet problem of BDDC */ 1073 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1074 /* set ksp_D into pcis data */ 1075 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1077 pcis->ksp_D = pcbddc->ksp_D; 1078 1079 /* NEUMANN PROBLEM */ 1080 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1081 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1082 if (pcbddc->ksp_R) { /* already created ksp */ 1083 PetscInt nn_R; 1084 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1085 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1086 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1087 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1088 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1092 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1093 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1094 reuse = MAT_INITIAL_MATRIX; 1095 } else { /* safe to reuse the matrix */ 1096 reuse = MAT_REUSE_MATRIX; 1097 } 1098 } 1099 /* last check */ 1100 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1101 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 } else { /* first time, so we need to create the matrix */ 1105 reuse = MAT_INITIAL_MATRIX; 1106 } 1107 /* extract A_RR */ 1108 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1109 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1110 if (ibs != mbs) { 1111 Mat newmat; 1112 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1113 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1115 } else { 1116 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1117 } 1118 if (!pcbddc->ksp_R) { /* create object if not present */ 1119 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1120 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1121 /* default */ 1122 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1123 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1124 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1125 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1126 if (issbaij) { 1127 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1128 } else { 1129 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1130 } 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1136 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1137 if (!n_R) { 1138 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1139 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1140 } 1141 /* Set Up KSP for Neumann problem of BDDC */ 1142 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1143 1144 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1145 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1146 /* Dirichlet */ 1147 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1148 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1149 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1152 /* need to be adapted? */ 1153 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1154 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1155 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1156 /* print info */ 1157 if (pcbddc->dbg_flag) { 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1171 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1172 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1175 /* need to be adapted? */ 1176 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1177 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1178 /* print info */ 1179 if (pcbddc->dbg_flag) { 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1185 } 1186 } 1187 /* free Neumann problem's matrix */ 1188 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1194 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1195 { 1196 PetscErrorCode ierr; 1197 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1198 1199 PetscFunctionBegin; 1200 if (applytranspose) { 1201 if (pcbddc->local_auxmat1) { 1202 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1203 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1204 } 1205 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1206 } else { 1207 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1210 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1211 } 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1219 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1220 { 1221 PetscErrorCode ierr; 1222 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1223 PC_IS* pcis = (PC_IS*) (pc->data); 1224 const PetscScalar zero = 0.0; 1225 1226 PetscFunctionBegin; 1227 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1228 if (applytranspose) { 1229 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1230 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1231 } else { 1232 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1234 } 1235 /* start communications from local primal nodes to rhs of coarse solver */ 1236 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1237 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1241 /* TODO remove null space when doing multilevel */ 1242 if (pcbddc->coarse_ksp) { 1243 if (applytranspose) { 1244 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1245 } else { 1246 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1247 } 1248 } 1249 /* start communications from coarse solver solution to local primal nodes */ 1250 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 1252 /* Local solution on R nodes */ 1253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 } 1260 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1261 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 if (pcbddc->switch_static) { 1265 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 } 1268 1269 /* complete communications from coarse sol to local primal nodes */ 1270 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 1272 /* Sum contributions from two levels */ 1273 if (applytranspose) { 1274 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1276 } else { 1277 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1278 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 PetscScalar *array,*array2; 1291 Vec from,to; 1292 1293 PetscFunctionBegin; 1294 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1295 from = pcbddc->coarse_vec; 1296 to = pcbddc->vec1_P; 1297 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1298 Vec tvec; 1299 PetscInt lsize; 1300 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1301 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1302 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1303 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1304 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1307 } 1308 } else { /* from local to global -> put data in coarse right hand side */ 1309 from = pcbddc->vec1_P; 1310 to = pcbddc->coarse_vec; 1311 } 1312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1319 { 1320 PetscErrorCode ierr; 1321 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1322 PetscScalar *array,*array2; 1323 Vec from,to; 1324 1325 PetscFunctionBegin; 1326 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1327 from = pcbddc->coarse_vec; 1328 to = pcbddc->vec1_P; 1329 } else { /* from local to global -> put data in coarse right hand side */ 1330 from = pcbddc->vec1_P; 1331 to = pcbddc->coarse_vec; 1332 } 1333 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1334 if (smode == SCATTER_FORWARD) { 1335 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1336 Vec tvec; 1337 PetscInt lsize; 1338 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1339 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1340 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1341 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1342 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1343 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1344 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1345 } 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /* uncomment for testing purposes */ 1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1355 { 1356 PetscErrorCode ierr; 1357 PC_IS* pcis = (PC_IS*)(pc->data); 1358 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1359 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1360 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1361 MatType impMatType=MATSEQAIJ; 1362 /* one and zero */ 1363 PetscScalar one=1.0,zero=0.0; 1364 /* space to store constraints and their local indices */ 1365 PetscScalar *temp_quadrature_constraint; 1366 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1367 /* iterators */ 1368 PetscInt i,j,k,total_counts,temp_start_ptr; 1369 /* stuff to store connected components stored in pcbddc->mat_graph */ 1370 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1371 PetscInt n_ISForFaces,n_ISForEdges; 1372 /* near null space stuff */ 1373 MatNullSpace nearnullsp; 1374 const Vec *nearnullvecs; 1375 Vec *localnearnullsp; 1376 PetscBool nnsp_has_cnst; 1377 PetscInt nnsp_size; 1378 PetscScalar *array; 1379 /* BLAS integers */ 1380 PetscBLASInt lwork,lierr; 1381 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1382 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1383 /* LAPACK working arrays for SVD or POD */ 1384 PetscBool skip_lapack; 1385 PetscScalar *work; 1386 PetscReal *singular_vals; 1387 #if defined(PETSC_USE_COMPLEX) 1388 PetscReal *rwork; 1389 #endif 1390 #if defined(PETSC_MISSING_LAPACK_GESVD) 1391 PetscBLASInt Blas_one_2=1; 1392 PetscScalar *temp_basis,*correlation_mat; 1393 #else 1394 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1395 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1396 #endif 1397 /* reuse */ 1398 PetscInt olocal_primal_size; 1399 PetscInt *oprimal_indices_local_idxs; 1400 /* change of basis */ 1401 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1402 PetscBool boolforchange,qr_needed; 1403 PetscBT touched,change_basis,qr_needed_idx; 1404 /* auxiliary stuff */ 1405 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1406 /* some quantities */ 1407 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1408 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1409 1410 1411 PetscFunctionBegin; 1412 /* Destroy Mat objects computed previously */ 1413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1415 /* Get index sets for faces, edges and vertices from graph */ 1416 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1417 /* free unneeded index sets */ 1418 if (!pcbddc->use_vertices) { 1419 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1420 } 1421 if (!pcbddc->use_edges) { 1422 for (i=0;i<n_ISForEdges;i++) { 1423 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1426 n_ISForEdges = 0; 1427 } 1428 if (!pcbddc->use_faces) { 1429 for (i=0;i<n_ISForFaces;i++) { 1430 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1433 n_ISForFaces = 0; 1434 } 1435 /* HACKS (the following two blocks of code) */ 1436 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1437 pcbddc->use_change_of_basis = PETSC_TRUE; 1438 if (!ISForEdges) { 1439 pcbddc->use_change_on_faces = PETSC_TRUE; 1440 } 1441 } 1442 if (pcbddc->NullSpace) { 1443 /* use_change_of_basis should be consistent among processors */ 1444 PetscBool tbool[2],gbool[2]; 1445 tbool [0] = pcbddc->use_change_of_basis; 1446 tbool [1] = pcbddc->use_change_on_faces; 1447 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1448 pcbddc->use_change_of_basis = gbool[0]; 1449 pcbddc->use_change_on_faces = gbool[1]; 1450 } 1451 /* print some info */ 1452 if (pcbddc->dbg_flag) { 1453 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1455 i = 0; 1456 if (ISForVertices) { 1457 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1458 } 1459 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 } 1464 /* check if near null space is attached to global mat */ 1465 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1466 if (nearnullsp) { 1467 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1468 /* remove any stored info */ 1469 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1470 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1471 /* store information for BDDC solver reuse */ 1472 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1473 pcbddc->onearnullspace = nearnullsp; 1474 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1475 for (i=0;i<nnsp_size;i++) { 1476 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1477 } 1478 } else { /* if near null space is not provided BDDC uses constants by default */ 1479 nnsp_size = 0; 1480 nnsp_has_cnst = PETSC_TRUE; 1481 } 1482 /* get max number of constraints on a single cc */ 1483 max_constraints = nnsp_size; 1484 if (nnsp_has_cnst) max_constraints++; 1485 1486 /* 1487 Evaluate maximum storage size needed by the procedure 1488 - temp_indices will contain start index of each constraint stored as follows 1489 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1490 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1491 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1492 */ 1493 total_counts = n_ISForFaces+n_ISForEdges; 1494 total_counts *= max_constraints; 1495 n_vertices = 0; 1496 if (ISForVertices) { 1497 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1498 } 1499 total_counts += n_vertices; 1500 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1501 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1502 total_counts = 0; 1503 max_size_of_constraint = 0; 1504 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1505 if (i<n_ISForEdges) { 1506 used_IS = &ISForEdges[i]; 1507 } else { 1508 used_IS = &ISForFaces[i-n_ISForEdges]; 1509 } 1510 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1511 total_counts += j; 1512 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1513 } 1514 total_counts *= max_constraints; 1515 total_counts += n_vertices; 1516 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1517 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1518 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1519 /* get local part of global near null space vectors */ 1520 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1521 for (k=0;k<nnsp_size;k++) { 1522 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1523 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 } 1526 1527 /* whether or not to skip lapack calls */ 1528 skip_lapack = PETSC_TRUE; 1529 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1530 1531 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1532 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1533 PetscScalar temp_work; 1534 #if defined(PETSC_MISSING_LAPACK_GESVD) 1535 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1536 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1537 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1539 #if defined(PETSC_USE_COMPLEX) 1540 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1541 #endif 1542 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1543 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1544 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1545 lwork = -1; 1546 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1547 #if !defined(PETSC_USE_COMPLEX) 1548 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1549 #else 1550 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1551 #endif 1552 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1553 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1554 #else /* on missing GESVD */ 1555 /* SVD */ 1556 PetscInt max_n,min_n; 1557 max_n = max_size_of_constraint; 1558 min_n = max_constraints; 1559 if (max_size_of_constraint < max_constraints) { 1560 min_n = max_size_of_constraint; 1561 max_n = max_constraints; 1562 } 1563 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1564 #if defined(PETSC_USE_COMPLEX) 1565 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1566 #endif 1567 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1568 lwork = -1; 1569 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1570 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1571 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1573 #if !defined(PETSC_USE_COMPLEX) 1574 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1575 #else 1576 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1577 #endif 1578 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1579 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1580 #endif /* on missing GESVD */ 1581 /* Allocate optimal workspace */ 1582 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1583 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1584 } 1585 /* Now we can loop on constraining sets */ 1586 total_counts = 0; 1587 temp_indices[0] = 0; 1588 /* vertices */ 1589 if (ISForVertices) { 1590 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1591 if (nnsp_has_cnst) { /* consider all vertices */ 1592 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1593 for (i=0;i<n_vertices;i++) { 1594 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1595 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1596 total_counts++; 1597 } 1598 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1599 PetscBool used_vertex; 1600 for (i=0;i<n_vertices;i++) { 1601 used_vertex = PETSC_FALSE; 1602 k = 0; 1603 while (!used_vertex && k<nnsp_size) { 1604 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1605 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1606 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1607 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1608 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1609 total_counts++; 1610 used_vertex = PETSC_TRUE; 1611 } 1612 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1613 k++; 1614 } 1615 } 1616 } 1617 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1618 n_vertices = total_counts; 1619 } 1620 1621 /* edges and faces */ 1622 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1623 if (i<n_ISForEdges) { 1624 used_IS = &ISForEdges[i]; 1625 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1626 } else { 1627 used_IS = &ISForFaces[i-n_ISForEdges]; 1628 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1629 } 1630 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1631 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1632 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1633 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1634 /* change of basis should not be performed on local periodic nodes */ 1635 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1636 if (nnsp_has_cnst) { 1637 PetscScalar quad_value; 1638 temp_constraints++; 1639 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1640 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1641 for (j=0;j<size_of_constraint;j++) { 1642 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1643 } 1644 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1645 total_counts++; 1646 } 1647 for (k=0;k<nnsp_size;k++) { 1648 PetscReal real_value; 1649 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1650 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1651 for (j=0;j<size_of_constraint;j++) { 1652 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1653 } 1654 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1655 /* check if array is null on the connected component */ 1656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1657 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1658 if (real_value > 0.0) { /* keep indices and values */ 1659 temp_constraints++; 1660 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1661 total_counts++; 1662 } 1663 } 1664 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1665 valid_constraints = temp_constraints; 1666 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1667 if (!pcbddc->use_nnsp_true && temp_constraints) { 1668 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1669 1670 #if defined(PETSC_MISSING_LAPACK_GESVD) 1671 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1672 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1673 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1674 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1675 from that computed using LAPACKgesvd 1676 -> This is due to a different computation of eigenvectors in LAPACKheev 1677 -> The quality of the POD-computed basis will be the same */ 1678 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1679 /* Store upper triangular part of correlation matrix */ 1680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1681 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1682 for (j=0;j<temp_constraints;j++) { 1683 for (k=0;k<j+1;k++) { 1684 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1685 } 1686 } 1687 /* compute eigenvalues and eigenvectors of correlation matrix */ 1688 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1689 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1690 #if !defined(PETSC_USE_COMPLEX) 1691 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1692 #else 1693 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1694 #endif 1695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1696 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1697 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1698 j=0; 1699 while (j < temp_constraints && singular_vals[j] < tol) j++; 1700 total_counts=total_counts-j; 1701 valid_constraints = temp_constraints-j; 1702 /* scale and copy POD basis into used quadrature memory */ 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1705 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1707 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1709 if (j<temp_constraints) { 1710 PetscInt ii; 1711 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1713 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1715 for (k=0;k<temp_constraints-j;k++) { 1716 for (ii=0;ii<size_of_constraint;ii++) { 1717 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1718 } 1719 } 1720 } 1721 #else /* on missing GESVD */ 1722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1726 #if !defined(PETSC_USE_COMPLEX) 1727 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1728 #else 1729 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1730 #endif 1731 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1732 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1733 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1734 k = temp_constraints; 1735 if (k > size_of_constraint) k = size_of_constraint; 1736 j = 0; 1737 while (j < k && singular_vals[k-j-1] < tol) j++; 1738 total_counts = total_counts-temp_constraints+k-j; 1739 valid_constraints = k-j; 1740 #endif /* on missing GESVD */ 1741 } 1742 /* setting change_of_basis flag is safe now */ 1743 if (boolforchange) { 1744 for (j=0;j<valid_constraints;j++) { 1745 PetscBTSet(change_basis,total_counts-j-1); 1746 } 1747 } 1748 } 1749 /* free index sets of faces, edges and vertices */ 1750 for (i=0;i<n_ISForFaces;i++) { 1751 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1752 } 1753 if (n_ISForFaces) { 1754 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1755 } 1756 for (i=0;i<n_ISForEdges;i++) { 1757 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1758 } 1759 if (n_ISForEdges) { 1760 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1761 } 1762 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1763 /* map temp_indices_to_constraint in boundary numbering */ 1764 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1765 if (i != temp_indices[total_counts]) { 1766 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1767 } 1768 1769 /* free workspace */ 1770 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1771 ierr = PetscFree(work);CHKERRQ(ierr); 1772 #if defined(PETSC_USE_COMPLEX) 1773 ierr = PetscFree(rwork);CHKERRQ(ierr); 1774 #endif 1775 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1776 #if defined(PETSC_MISSING_LAPACK_GESVD) 1777 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1778 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1779 #endif 1780 } 1781 for (k=0;k<nnsp_size;k++) { 1782 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1783 } 1784 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1785 1786 /* set quantities in pcbddc data structure and store previous primal size */ 1787 /* n_vertices defines the number of subdomain corners in the primal space */ 1788 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1789 olocal_primal_size = pcbddc->local_primal_size; 1790 pcbddc->local_primal_size = total_counts; 1791 pcbddc->n_vertices = n_vertices; 1792 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1793 1794 /* Create constraint matrix */ 1795 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1796 /* so we need to set it up properly either with or without change of basis */ 1797 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1798 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1799 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1800 /* array to compute a local numbering of constraints : vertices first then constraints */ 1801 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1802 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1803 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1804 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1805 /* auxiliary stuff for basis change */ 1806 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1807 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1808 1809 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1810 total_primal_vertices=0; 1811 for (i=0;i<pcbddc->local_primal_size;i++) { 1812 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1813 if (size_of_constraint == 1) { 1814 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1815 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1816 aux_primal_minloc[total_primal_vertices]=0; 1817 total_primal_vertices++; 1818 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1819 PetscInt min_loc,min_index; 1820 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1821 /* find first untouched local node */ 1822 k = 0; 1823 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1824 min_index = global_indices[k]; 1825 min_loc = k; 1826 /* search the minimum among global nodes already untouched on the cc */ 1827 for (k=1;k<size_of_constraint;k++) { 1828 /* there can be more than one constraint on a single connected component */ 1829 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1830 min_index = global_indices[k]; 1831 min_loc = k; 1832 } 1833 } 1834 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1835 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1836 aux_primal_minloc[total_primal_vertices]=min_loc; 1837 total_primal_vertices++; 1838 } 1839 } 1840 /* determine if a QR strategy is needed for change of basis */ 1841 qr_needed = PETSC_FALSE; 1842 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1843 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1844 if (PetscBTLookup(change_basis,i)) { 1845 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1846 j = 0; 1847 for (k=0;k<size_of_constraint;k++) { 1848 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1849 j++; 1850 } 1851 } 1852 /* found more than one primal dof on the cc */ 1853 if (j > 1) { 1854 PetscBTSet(qr_needed_idx,i); 1855 qr_needed = PETSC_TRUE; 1856 } 1857 } 1858 } 1859 /* free workspace */ 1860 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1861 1862 /* permute indices in order to have a sorted set of vertices */ 1863 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1864 1865 /* nonzero structure of constraint matrix */ 1866 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1867 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1868 j=total_primal_vertices; 1869 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1870 if (!PetscBTLookup(change_basis,i)) { 1871 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1872 j++; 1873 } 1874 } 1875 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1876 ierr = PetscFree(nnz);CHKERRQ(ierr); 1877 /* set values in constraint matrix */ 1878 for (i=0;i<total_primal_vertices;i++) { 1879 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1880 } 1881 total_counts = total_primal_vertices; 1882 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1883 if (!PetscBTLookup(change_basis,i)) { 1884 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1885 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1886 total_counts++; 1887 } 1888 } 1889 /* assembling */ 1890 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1891 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1892 /* 1893 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1894 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1895 */ 1896 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1897 if (pcbddc->use_change_of_basis) { 1898 /* dual and primal dofs on a single cc */ 1899 PetscInt dual_dofs,primal_dofs; 1900 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1901 PetscInt primal_counter; 1902 /* working stuff for GEQRF */ 1903 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1904 PetscBLASInt lqr_work; 1905 /* working stuff for UNGQR */ 1906 PetscScalar *gqr_work,lgqr_work_t; 1907 PetscBLASInt lgqr_work; 1908 /* working stuff for TRTRS */ 1909 PetscScalar *trs_rhs; 1910 PetscBLASInt Blas_NRHS; 1911 /* pointers for values insertion into change of basis matrix */ 1912 PetscInt *start_rows,*start_cols; 1913 PetscScalar *start_vals; 1914 /* working stuff for values insertion */ 1915 PetscBT is_primal; 1916 1917 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1918 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1919 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1920 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1921 /* work arrays */ 1922 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1923 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1924 /* nonzeros per row */ 1925 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1926 if (PetscBTLookup(change_basis,i)) { 1927 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1928 if (PetscBTLookup(qr_needed_idx,i)) { 1929 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1930 } else { 1931 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1932 /* get local primal index on the cc */ 1933 j = 0; 1934 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1935 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1936 } 1937 } 1938 } 1939 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1940 ierr = PetscFree(nnz);CHKERRQ(ierr); 1941 /* Set initial identity in the matrix */ 1942 for (i=0;i<pcis->n_B;i++) { 1943 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1944 } 1945 1946 if (pcbddc->dbg_flag) { 1947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1949 } 1950 1951 1952 /* Now we loop on the constraints which need a change of basis */ 1953 /* 1954 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1955 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1956 1957 Basic blocks of change of basis matrix T computed 1958 1959 - Using the following block transformation if there is only a primal dof on the cc 1960 (in the example, primal dof is the last one of the edge in LOCAL ordering 1961 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1962 | 1 0 ... 0 1 | 1963 | 0 1 ... 0 1 | 1964 | ... | 1965 | 0 ... 1 1 | 1966 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1967 1968 - via QR decomposition of constraints otherwise 1969 */ 1970 if (qr_needed) { 1971 /* space to store Q */ 1972 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1973 /* first we issue queries for optimal work */ 1974 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1975 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1976 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1977 lqr_work = -1; 1978 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1979 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1980 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1981 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1982 lgqr_work = -1; 1983 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1985 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1986 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1987 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1988 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1989 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1990 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1991 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1992 /* array to store scaling factors for reflectors */ 1993 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1994 /* array to store rhs and solution of triangular solver */ 1995 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1996 /* allocating workspace for check */ 1997 if (pcbddc->dbg_flag) { 1998 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1999 } 2000 } 2001 /* array to store whether a node is primal or not */ 2002 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2003 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2004 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2005 if (i != total_primal_vertices) { 2006 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2007 } 2008 for (i=0;i<total_primal_vertices;i++) { 2009 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2010 } 2011 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2012 2013 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2014 /* -> using implicit ordering contained in temp_indices data */ 2015 total_counts = pcbddc->n_vertices; 2016 primal_counter = total_counts; 2017 while (total_counts<pcbddc->local_primal_size) { 2018 primal_dofs = 1; 2019 if (PetscBTLookup(change_basis,total_counts)) { 2020 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2021 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2022 primal_dofs++; 2023 } 2024 /* get constraint info */ 2025 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2026 dual_dofs = size_of_constraint-primal_dofs; 2027 2028 if (pcbddc->dbg_flag) { 2029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2030 } 2031 2032 if (primal_dofs > 1) { /* QR */ 2033 2034 /* copy quadrature constraints for change of basis check */ 2035 if (pcbddc->dbg_flag) { 2036 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2037 } 2038 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2039 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2040 2041 /* compute QR decomposition of constraints */ 2042 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2043 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2045 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2046 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2047 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2048 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2049 2050 /* explictly compute R^-T */ 2051 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2052 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2057 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2058 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2059 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2060 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2061 2062 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2065 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2067 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2068 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2069 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2070 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2071 2072 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2073 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2074 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2075 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2077 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2081 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2082 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2083 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2084 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2085 2086 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2087 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2088 /* insert cols for primal dofs */ 2089 for (j=0;j<primal_dofs;j++) { 2090 start_vals = &qr_basis[j*size_of_constraint]; 2091 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2092 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2093 } 2094 /* insert cols for dual dofs */ 2095 for (j=0,k=0;j<dual_dofs;k++) { 2096 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2097 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2098 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2099 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2100 j++; 2101 } 2102 } 2103 2104 /* check change of basis */ 2105 if (pcbddc->dbg_flag) { 2106 PetscInt ii,jj; 2107 PetscBool valid_qr=PETSC_TRUE; 2108 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2115 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2116 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2117 for (jj=0;jj<size_of_constraint;jj++) { 2118 for (ii=0;ii<primal_dofs;ii++) { 2119 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2120 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2121 } 2122 } 2123 if (!valid_qr) { 2124 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2125 for (jj=0;jj<size_of_constraint;jj++) { 2126 for (ii=0;ii<primal_dofs;ii++) { 2127 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2128 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2129 } 2130 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2131 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2132 } 2133 } 2134 } 2135 } else { 2136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2137 } 2138 } 2139 } else { /* simple transformation block */ 2140 PetscInt row,col; 2141 PetscScalar val; 2142 for (j=0;j<size_of_constraint;j++) { 2143 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2144 if (!PetscBTLookup(is_primal,row)) { 2145 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2146 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2147 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2148 } else { 2149 for (k=0;k<size_of_constraint;k++) { 2150 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2151 if (row != col) { 2152 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2153 } else { 2154 val = 1.0; 2155 } 2156 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2157 } 2158 } 2159 } 2160 if (pcbddc->dbg_flag) { 2161 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2162 } 2163 } 2164 /* increment primal counter */ 2165 primal_counter += primal_dofs; 2166 } else { 2167 if (pcbddc->dbg_flag) { 2168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2169 } 2170 } 2171 /* increment constraint counter total_counts */ 2172 total_counts += primal_dofs; 2173 } 2174 2175 /* free workspace */ 2176 if (qr_needed) { 2177 if (pcbddc->dbg_flag) { 2178 ierr = PetscFree(work);CHKERRQ(ierr); 2179 } 2180 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2181 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2182 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2183 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2184 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2185 } 2186 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2187 /* assembling */ 2188 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2189 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2190 /* 2191 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2192 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2193 */ 2194 } 2195 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2196 if (pcbddc->user_ChangeOfBasisMatrix) { 2197 PetscInt rows,cols; 2198 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2199 if (rows == pcis->n && cols == pcis->n) { 2200 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2201 } else { 2202 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2203 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2204 } 2205 } 2206 2207 /* get indices in local ordering for vertices and constraints */ 2208 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2209 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2210 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2211 } 2212 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2213 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2214 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2215 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2216 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2217 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2218 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2219 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2220 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2221 /* set quantities in PCBDDC data struct */ 2222 pcbddc->n_actual_vertices = i; 2223 /* check if a new primal space has been introduced */ 2224 pcbddc->new_primal_space_local = PETSC_TRUE; 2225 if (olocal_primal_size == pcbddc->local_primal_size) { 2226 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2227 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2228 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2229 } 2230 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2231 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2232 2233 /* flush dbg viewer */ 2234 if (pcbddc->dbg_flag) { 2235 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2236 } 2237 2238 /* free workspace */ 2239 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2240 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2241 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2242 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2243 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2244 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2245 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2246 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2252 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2253 { 2254 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2255 PC_IS *pcis = (PC_IS*)pc->data; 2256 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2257 PetscInt ierr,i,vertex_size; 2258 PetscViewer viewer=pcbddc->dbg_viewer; 2259 2260 PetscFunctionBegin; 2261 /* Reset previously computed graph */ 2262 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2263 /* Init local Graph struct */ 2264 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2265 2266 /* Check validity of the csr graph passed in by the user */ 2267 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2268 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2269 } 2270 2271 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2272 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2273 Mat mat_adj; 2274 const PetscInt *xadj,*adjncy; 2275 PetscBool flg_row=PETSC_TRUE; 2276 2277 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2278 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2279 if (!flg_row) { 2280 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2281 } 2282 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2283 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2284 if (!flg_row) { 2285 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2286 } 2287 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2288 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2289 } 2290 2291 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2292 vertex_size = 1; 2293 if (pcbddc->user_provided_isfordofs) { 2294 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2295 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2296 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2297 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2298 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2299 } 2300 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2301 pcbddc->n_ISForDofs = 0; 2302 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2303 } 2304 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2305 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2306 } else { 2307 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2308 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2309 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2310 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2311 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2312 } 2313 } 2314 } 2315 2316 /* Setup of Graph */ 2317 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2318 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2319 } 2320 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2321 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2322 } 2323 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2324 2325 /* Graph's connected components analysis */ 2326 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2327 2328 /* print some info to stdout */ 2329 if (pcbddc->dbg_flag) { 2330 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2331 } 2332 2333 /* mark topography has done */ 2334 pcbddc->recompute_topography = PETSC_FALSE; 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #undef __FUNCT__ 2339 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2340 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2341 { 2342 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2343 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2344 PetscErrorCode ierr; 2345 2346 PetscFunctionBegin; 2347 n = 0; 2348 vertices = 0; 2349 if (pcbddc->ConstraintMatrix) { 2350 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2351 for (i=0;i<local_primal_size;i++) { 2352 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2353 if (size_of_constraint == 1) n++; 2354 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2355 } 2356 if (vertices_idx) { 2357 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2358 n = 0; 2359 for (i=0;i<local_primal_size;i++) { 2360 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2361 if (size_of_constraint == 1) { 2362 vertices[n++]=row_cmat_indices[0]; 2363 } 2364 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2365 } 2366 } 2367 } 2368 *n_vertices = n; 2369 if (vertices_idx) *vertices_idx = vertices; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2375 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2376 { 2377 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2378 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2379 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2380 PetscBT touched; 2381 PetscErrorCode ierr; 2382 2383 /* This function assumes that the number of local constraints per connected component 2384 is not greater than the number of nodes defined for the connected component 2385 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2386 PetscFunctionBegin; 2387 n = 0; 2388 constraints_index = 0; 2389 if (pcbddc->ConstraintMatrix) { 2390 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2391 max_size_of_constraint = 0; 2392 for (i=0;i<local_primal_size;i++) { 2393 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2394 if (size_of_constraint > 1) { 2395 n++; 2396 } 2397 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2398 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2399 } 2400 if (constraints_idx) { 2401 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2402 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2403 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2404 n = 0; 2405 for (i=0;i<local_primal_size;i++) { 2406 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2407 if (size_of_constraint > 1) { 2408 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2409 /* find first untouched local node */ 2410 j = 0; 2411 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2412 min_index = row_cmat_global_indices[j]; 2413 min_loc = j; 2414 /* search the minimum among nodes not yet touched on the connected component 2415 since there can be more than one constraint on a single cc */ 2416 for (j=1;j<size_of_constraint;j++) { 2417 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2418 min_index = row_cmat_global_indices[j]; 2419 min_loc = j; 2420 } 2421 } 2422 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2423 constraints_index[n++] = row_cmat_indices[min_loc]; 2424 } 2425 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2426 } 2427 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2428 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2429 } 2430 } 2431 *n_constraints = n; 2432 if (constraints_idx) *constraints_idx = constraints_index; 2433 PetscFunctionReturn(0); 2434 } 2435 2436 /* the next two functions has been adapted from pcis.c */ 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "PCBDDCApplySchur" 2439 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2440 { 2441 PetscErrorCode ierr; 2442 PC_IS *pcis = (PC_IS*)(pc->data); 2443 2444 PetscFunctionBegin; 2445 if (!vec2_B) { vec2_B = v; } 2446 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2447 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2448 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2449 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2450 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2456 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2457 { 2458 PetscErrorCode ierr; 2459 PC_IS *pcis = (PC_IS*)(pc->data); 2460 2461 PetscFunctionBegin; 2462 if (!vec2_B) { vec2_B = v; } 2463 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2464 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2465 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2466 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2467 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "PCBDDCSubsetNumbering" 2473 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2474 { 2475 Vec local_vec,global_vec; 2476 IS seqis,paris; 2477 VecScatter scatter_ctx; 2478 PetscScalar *array; 2479 PetscInt *temp_global_dofs; 2480 PetscScalar globalsum; 2481 PetscInt i,j,s; 2482 PetscInt nlocals,first_index,old_index,max_local; 2483 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2484 PetscMPIInt *dof_sizes,*dof_displs; 2485 PetscBool first_found; 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 /* mpi buffers */ 2490 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2491 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2492 j = ( !rank_prec_comm ? size_prec_comm : 0); 2493 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2495 /* get maximum size of subset */ 2496 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2497 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2498 max_local = 0; 2499 for (i=0;i<n_local_dofs;i++) { 2500 if (max_local < temp_global_dofs[i] ) { 2501 max_local = temp_global_dofs[i]; 2502 } 2503 } 2504 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2505 max_global++; 2506 max_local = 0; 2507 for (i=0;i<n_local_dofs;i++) { 2508 if (max_local < local_dofs[i] ) { 2509 max_local = local_dofs[i]; 2510 } 2511 } 2512 max_local++; 2513 /* allocate workspace */ 2514 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2515 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2516 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2517 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2518 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2519 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2520 /* create scatter */ 2521 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2522 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2523 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2524 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2525 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2526 /* init array */ 2527 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2528 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2529 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2530 if (local_dofs_mult) { 2531 for (i=0;i<n_local_dofs;i++) { 2532 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2533 } 2534 } else { 2535 for (i=0;i<n_local_dofs;i++) { 2536 array[local_dofs[i]]=1.0; 2537 } 2538 } 2539 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2540 /* scatter into global vec and get total number of global dofs */ 2541 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2542 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2543 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2544 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2545 /* Fill global_vec with cumulative function for global numbering */ 2546 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2547 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2548 nlocals = 0; 2549 first_index = -1; 2550 first_found = PETSC_FALSE; 2551 for (i=0;i<s;i++) { 2552 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2553 first_found = PETSC_TRUE; 2554 first_index = i; 2555 } 2556 nlocals += (PetscInt)PetscRealPart(array[i]); 2557 } 2558 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2559 if (!rank_prec_comm) { 2560 dof_displs[0]=0; 2561 for (i=1;i<size_prec_comm;i++) { 2562 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2563 } 2564 } 2565 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2566 if (first_found) { 2567 array[first_index] += (PetscScalar)nlocals; 2568 old_index = first_index; 2569 for (i=first_index+1;i<s;i++) { 2570 if (PetscRealPart(array[i]) > 0.1) { 2571 array[i] += array[old_index]; 2572 old_index = i; 2573 } 2574 } 2575 } 2576 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2577 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2578 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2579 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2580 /* get global ordering of local dofs */ 2581 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2582 if (local_dofs_mult) { 2583 for (i=0;i<n_local_dofs;i++) { 2584 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2585 } 2586 } else { 2587 for (i=0;i<n_local_dofs;i++) { 2588 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2589 } 2590 } 2591 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2592 /* free workspace */ 2593 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2594 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2595 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2596 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2597 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2598 /* return pointer to global ordering of local dofs */ 2599 *global_numbering_subset = temp_global_dofs; 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2605 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2606 { 2607 PetscInt i,j; 2608 PetscScalar *alphas; 2609 PetscErrorCode ierr; 2610 2611 PetscFunctionBegin; 2612 /* this implements stabilized Gram-Schmidt */ 2613 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2614 for (i=0;i<n;i++) { 2615 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2616 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2617 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2618 } 2619 ierr = PetscFree(alphas);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "MatISGetSubassemblingPattern" 2625 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2626 { 2627 Mat subdomain_adj; 2628 IS new_ranks,ranks_send_to; 2629 MatPartitioning partitioner; 2630 Mat_IS *matis; 2631 PetscInt n_neighs,*neighs,*n_shared,**shared; 2632 PetscInt prank; 2633 PetscMPIInt size,rank,color; 2634 PetscInt *xadj,*adjncy,*oldranks; 2635 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2636 PetscInt i,j,local_size,threshold=0; 2637 PetscErrorCode ierr; 2638 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2639 PetscSubcomm subcomm; 2640 2641 PetscFunctionBegin; 2642 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2643 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2644 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2645 2646 /* Get info on mapping */ 2647 matis = (Mat_IS*)(mat->data); 2648 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2649 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2650 2651 /* build local CSR graph of subdomains' connectivity */ 2652 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2653 xadj[0] = 0; 2654 xadj[1] = PetscMax(n_neighs-1,0); 2655 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2656 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2657 2658 if (threshold) { 2659 PetscInt* count,min_threshold; 2660 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2661 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2662 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2663 for (j=0;j<n_shared[i];j++) { 2664 count[shared[i][j]] += 1; 2665 } 2666 } 2667 /* adapt threshold since we dont want to lose significant connections */ 2668 min_threshold = n_neighs; 2669 for (i=1;i<n_neighs;i++) { 2670 for (j=0;j<n_shared[i];j++) { 2671 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2672 } 2673 } 2674 threshold = PetscMax(min_threshold+1,threshold); 2675 xadj[1] = 0; 2676 for (i=1;i<n_neighs;i++) { 2677 for (j=0;j<n_shared[i];j++) { 2678 if (count[shared[i][j]] < threshold) { 2679 adjncy[xadj[1]] = neighs[i]; 2680 adjncy_wgt[xadj[1]] = n_shared[i]; 2681 xadj[1]++; 2682 break; 2683 } 2684 } 2685 } 2686 ierr = PetscFree(count);CHKERRQ(ierr); 2687 } else { 2688 if (xadj[1]) { 2689 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2690 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2691 } 2692 } 2693 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2694 if (use_square) { 2695 for (i=0;i<xadj[1];i++) { 2696 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2697 } 2698 } 2699 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2700 2701 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2702 2703 /* 2704 Restrict work on active processes only. 2705 */ 2706 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2707 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2708 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2709 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2710 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2711 if (color) { 2712 ierr = PetscFree(xadj);CHKERRQ(ierr); 2713 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2714 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2715 } else { 2716 PetscInt coarsening_ratio; 2717 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2718 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2719 prank = rank; 2720 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2721 /* 2722 for (i=0;i<size;i++) { 2723 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2724 } 2725 */ 2726 for (i=0;i<xadj[1];i++) { 2727 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2728 } 2729 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2730 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2731 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2732 2733 /* Partition */ 2734 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2735 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2736 if (use_vwgt) { 2737 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2738 v_wgt[0] = local_size; 2739 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2740 } 2741 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2742 coarsening_ratio = size/n_subdomains; 2743 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2744 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2745 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2746 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2747 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2748 2749 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2750 if (contiguous) { 2751 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2752 } else { 2753 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2754 } 2755 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2756 /* clean up */ 2757 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2758 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2759 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2760 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2761 } 2762 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2763 2764 /* assemble parallel IS for sends */ 2765 i = 1; 2766 if (color) i=0; 2767 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2768 2769 /* get back IS */ 2770 *is_sends = ranks_send_to; 2771 PetscFunctionReturn(0); 2772 } 2773 2774 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatISSubassemble" 2778 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2779 { 2780 Mat local_mat; 2781 Mat_IS *matis; 2782 IS is_sends_internal; 2783 PetscInt rows,cols; 2784 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2785 PetscBool ismatis,isdense,destroy_mat; 2786 ISLocalToGlobalMapping l2gmap; 2787 PetscInt* l2gmap_indices; 2788 const PetscInt* is_indices; 2789 MatType new_local_type; 2790 /* buffers */ 2791 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2792 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2793 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2794 /* MPI */ 2795 MPI_Comm comm,comm_n; 2796 PetscSubcomm subcomm; 2797 PetscMPIInt n_sends,n_recvs,commsize; 2798 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2799 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2800 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2801 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2802 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2803 PetscErrorCode ierr; 2804 2805 PetscFunctionBegin; 2806 /* TODO: add missing checks */ 2807 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2808 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2809 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2810 PetscValidLogicalCollectiveInt(mat,nis,7); 2811 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2812 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2813 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2814 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2815 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2816 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2817 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2818 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2819 PetscInt mrows,mcols,mnrows,mncols; 2820 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2821 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2822 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2823 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2824 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2825 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2826 } 2827 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2828 PetscValidLogicalCollectiveInt(mat,bs,0); 2829 /* prepare IS for sending if not provided */ 2830 if (!is_sends) { 2831 PetscBool pcontig = PETSC_TRUE; 2832 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2833 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2834 } else { 2835 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2836 is_sends_internal = is_sends; 2837 } 2838 2839 /* get pointer of MATIS data */ 2840 matis = (Mat_IS*)mat->data; 2841 2842 /* get comm */ 2843 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2844 2845 /* compute number of sends */ 2846 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2847 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2848 2849 /* compute number of receives */ 2850 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2851 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2852 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2853 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2854 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2855 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2856 ierr = PetscFree(iflags);CHKERRQ(ierr); 2857 2858 /* restrict comm if requested */ 2859 subcomm = 0; 2860 destroy_mat = PETSC_FALSE; 2861 if (restrict_comm) { 2862 PetscMPIInt color,rank,subcommsize; 2863 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2864 color = 0; 2865 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2866 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2867 subcommsize = commsize - subcommsize; 2868 /* check if reuse has been requested */ 2869 if (reuse == MAT_REUSE_MATRIX) { 2870 if (*mat_n) { 2871 PetscMPIInt subcommsize2; 2872 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 2873 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 2874 comm_n = PetscObjectComm((PetscObject)*mat_n); 2875 } else { 2876 comm_n = PETSC_COMM_SELF; 2877 } 2878 } else { /* MAT_INITIAL_MATRIX */ 2879 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 2880 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 2881 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2882 comm_n = subcomm->comm; 2883 } 2884 /* flag to destroy *mat_n if not significative */ 2885 if (color) destroy_mat = PETSC_TRUE; 2886 } else { 2887 comm_n = comm; 2888 } 2889 2890 /* prepare send/receive buffers */ 2891 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2892 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2893 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2894 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2895 if (nis) { 2896 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 2897 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 2898 } 2899 2900 /* Get data from local matrices */ 2901 if (!isdense) { 2902 /* TODO: See below some guidelines on how to prepare the local buffers */ 2903 /* 2904 send_buffer_vals should contain the raw values of the local matrix 2905 send_buffer_idxs should contain: 2906 - MatType_PRIVATE type 2907 - PetscInt size_of_l2gmap 2908 - PetscInt global_row_indices[size_of_l2gmap] 2909 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2910 */ 2911 } else { 2912 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2913 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2914 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2915 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2916 send_buffer_idxs[1] = i; 2917 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2918 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2919 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2920 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2921 for (i=0;i<n_sends;i++) { 2922 ilengths_vals[is_indices[i]] = len*len; 2923 ilengths_idxs[is_indices[i]] = len+2; 2924 } 2925 } 2926 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2927 /* additional is (if any) */ 2928 if (nis) { 2929 PetscMPIInt psum; 2930 PetscInt j; 2931 for (j=0,psum=0;j<nis;j++) { 2932 PetscInt plen; 2933 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2934 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 2935 psum += len+1; /* indices + lenght */ 2936 } 2937 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 2938 for (j=0,psum=0;j<nis;j++) { 2939 PetscInt plen; 2940 const PetscInt *is_array_idxs; 2941 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2942 send_buffer_idxs_is[psum] = plen; 2943 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2944 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 2945 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2946 psum += plen+1; /* indices + lenght */ 2947 } 2948 for (i=0;i<n_sends;i++) { 2949 ilengths_idxs_is[is_indices[i]] = psum; 2950 } 2951 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 2952 } 2953 2954 buf_size_idxs = 0; 2955 buf_size_vals = 0; 2956 buf_size_idxs_is = 0; 2957 for (i=0;i<n_recvs;i++) { 2958 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2959 buf_size_vals += (PetscInt)olengths_vals[i]; 2960 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 2961 } 2962 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2963 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2964 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 2965 2966 /* get new tags for clean communications */ 2967 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2968 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2969 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 2970 2971 /* allocate for requests */ 2972 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2973 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2974 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 2975 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2976 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2977 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 2978 2979 /* communications */ 2980 ptr_idxs = recv_buffer_idxs; 2981 ptr_vals = recv_buffer_vals; 2982 ptr_idxs_is = recv_buffer_idxs_is; 2983 for (i=0;i<n_recvs;i++) { 2984 source_dest = onodes[i]; 2985 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2986 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2987 ptr_idxs += olengths_idxs[i]; 2988 ptr_vals += olengths_vals[i]; 2989 if (nis) { 2990 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 2991 ptr_idxs_is += olengths_idxs_is[i]; 2992 } 2993 } 2994 for (i=0;i<n_sends;i++) { 2995 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2996 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2997 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2998 if (nis) { 2999 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3000 } 3001 } 3002 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3003 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3004 3005 /* assemble new l2g map */ 3006 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3007 ptr_idxs = recv_buffer_idxs; 3008 buf_size_idxs = 0; 3009 for (i=0;i<n_recvs;i++) { 3010 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3011 ptr_idxs += olengths_idxs[i]; 3012 } 3013 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3014 ptr_idxs = recv_buffer_idxs; 3015 buf_size_idxs = 0; 3016 for (i=0;i<n_recvs;i++) { 3017 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3018 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3019 ptr_idxs += olengths_idxs[i]; 3020 } 3021 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3022 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3023 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3024 3025 /* infer new local matrix type from received local matrices type */ 3026 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3027 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3028 if (n_recvs) { 3029 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3030 ptr_idxs = recv_buffer_idxs; 3031 for (i=0;i<n_recvs;i++) { 3032 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3033 new_local_type_private = MATAIJ_PRIVATE; 3034 break; 3035 } 3036 ptr_idxs += olengths_idxs[i]; 3037 } 3038 switch (new_local_type_private) { 3039 case MATDENSE_PRIVATE: 3040 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3041 new_local_type = MATSEQAIJ; 3042 bs = 1; 3043 } else { /* if I receive only 1 dense matrix */ 3044 new_local_type = MATSEQDENSE; 3045 bs = 1; 3046 } 3047 break; 3048 case MATAIJ_PRIVATE: 3049 new_local_type = MATSEQAIJ; 3050 bs = 1; 3051 break; 3052 case MATBAIJ_PRIVATE: 3053 new_local_type = MATSEQBAIJ; 3054 break; 3055 case MATSBAIJ_PRIVATE: 3056 new_local_type = MATSEQSBAIJ; 3057 break; 3058 default: 3059 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3060 break; 3061 } 3062 } else { /* by default, new_local_type is seqdense */ 3063 new_local_type = MATSEQDENSE; 3064 bs = 1; 3065 } 3066 3067 /* create MATIS object if needed */ 3068 if (reuse == MAT_INITIAL_MATRIX) { 3069 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3070 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3071 } else { 3072 /* it also destroys the local matrices */ 3073 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3074 } 3075 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3076 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3077 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3078 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3079 3080 /* set values */ 3081 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3082 ptr_vals = recv_buffer_vals; 3083 ptr_idxs = recv_buffer_idxs; 3084 for (i=0;i<n_recvs;i++) { 3085 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3086 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3087 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3088 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3089 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3090 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3091 } else { 3092 /* TODO */ 3093 } 3094 ptr_idxs += olengths_idxs[i]; 3095 ptr_vals += olengths_vals[i]; 3096 } 3097 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3098 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3099 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3100 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3101 3102 #if 0 3103 if (!restrict_comm) { /* check */ 3104 Vec lvec,rvec; 3105 PetscReal infty_error; 3106 3107 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3108 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3109 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3110 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3111 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3112 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3113 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3114 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3115 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3116 } 3117 #endif 3118 3119 /* assemble new additional is (if any) */ 3120 if (nis) { 3121 PetscInt **temp_idxs,*count_is,j,psum; 3122 3123 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3124 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3125 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3126 ptr_idxs = recv_buffer_idxs_is; 3127 psum = 0; 3128 for (i=0;i<n_recvs;i++) { 3129 for (j=0;j<nis;j++) { 3130 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3131 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3132 psum += plen; 3133 ptr_idxs += plen+1; /* shift pointer to received data */ 3134 } 3135 } 3136 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3137 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3138 for (i=1;i<nis;i++) { 3139 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3140 } 3141 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3142 ptr_idxs = recv_buffer_idxs_is; 3143 for (i=0;i<n_recvs;i++) { 3144 for (j=0;j<nis;j++) { 3145 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3146 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3147 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3148 ptr_idxs += plen+1; /* shift pointer to received data */ 3149 } 3150 } 3151 for (i=0;i<nis;i++) { 3152 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3153 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3154 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3155 } 3156 ierr = PetscFree(count_is);CHKERRQ(ierr); 3157 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3158 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3159 } 3160 /* free workspace */ 3161 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3162 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3163 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3164 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3165 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3166 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3167 if (isdense) { 3168 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3169 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3170 } else { 3171 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3172 } 3173 if (nis) { 3174 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3175 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3176 } 3177 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3178 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3179 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3180 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3181 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3182 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3183 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3184 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3185 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3186 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3187 ierr = PetscFree(onodes);CHKERRQ(ierr); 3188 if (nis) { 3189 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3190 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3191 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3192 } 3193 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3194 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3195 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3196 for (i=0;i<nis;i++) { 3197 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3198 } 3199 } 3200 PetscFunctionReturn(0); 3201 } 3202 3203 /* temporary hack into ksp private data structure */ 3204 #include <petsc-private/kspimpl.h> 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3208 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3209 { 3210 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3211 PC_IS *pcis = (PC_IS*)pc->data; 3212 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3213 MatNullSpace CoarseNullSpace=NULL; 3214 ISLocalToGlobalMapping coarse_islg; 3215 IS coarse_is,*isarray; 3216 PetscInt i,im_active=-1,active_procs=-1; 3217 PetscInt nis,nisdofs,nisneu; 3218 PC pc_temp; 3219 PCType coarse_pc_type; 3220 KSPType coarse_ksp_type; 3221 PetscBool multilevel_requested,multilevel_allowed; 3222 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3223 Mat t_coarse_mat_is; 3224 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3225 PetscMPIInt all_procs; 3226 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3227 PetscBool compute_vecs = PETSC_FALSE; 3228 PetscErrorCode ierr; 3229 3230 PetscFunctionBegin; 3231 /* Assign global numbering to coarse dofs */ 3232 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3233 compute_vecs = PETSC_TRUE; 3234 PetscInt ocoarse_size; 3235 ocoarse_size = pcbddc->coarse_size; 3236 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3237 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3238 /* see if we can avoid some work */ 3239 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3240 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3241 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3242 coarse_reuse = PETSC_FALSE; 3243 } else { /* we can safely reuse already computed coarse matrix */ 3244 coarse_reuse = PETSC_TRUE; 3245 } 3246 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3247 coarse_reuse = PETSC_FALSE; 3248 } 3249 /* reset any subassembling information */ 3250 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3251 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3252 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3253 coarse_reuse = PETSC_TRUE; 3254 } 3255 3256 /* count "active" (i.e. with positive local size) and "void" processes */ 3257 im_active = !!(pcis->n); 3258 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3259 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3260 void_procs = all_procs-active_procs; 3261 csin_type_simple = PETSC_TRUE; 3262 if (pcbddc->current_level) { 3263 csin_ml = PETSC_TRUE; 3264 ncoarse_ml = void_procs; 3265 csin_ds = PETSC_TRUE; 3266 ncoarse_ds = void_procs; 3267 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3268 } else { 3269 csin_ml = PETSC_FALSE; 3270 ncoarse_ml = all_procs; 3271 if (void_procs) { 3272 csin_ds = PETSC_TRUE; 3273 ncoarse_ds = void_procs; 3274 csin_type_simple = PETSC_FALSE; 3275 } else { 3276 csin_ds = PETSC_FALSE; 3277 ncoarse_ds = all_procs; 3278 } 3279 } 3280 3281 /* 3282 test if we can go multilevel: three conditions must be satisfied: 3283 - we have not exceeded the number of levels requested 3284 - we can actually subassemble the active processes 3285 - we can find a suitable number of MPI processes where we can place the subassembled problem 3286 */ 3287 multilevel_allowed = PETSC_FALSE; 3288 multilevel_requested = PETSC_FALSE; 3289 if (pcbddc->current_level < pcbddc->max_levels) { 3290 multilevel_requested = PETSC_TRUE; 3291 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3292 multilevel_allowed = PETSC_FALSE; 3293 } else { 3294 multilevel_allowed = PETSC_TRUE; 3295 } 3296 } 3297 /* determine number of process partecipating to coarse solver */ 3298 if (multilevel_allowed) { 3299 ncoarse = ncoarse_ml; 3300 csin = csin_ml; 3301 } else { 3302 ncoarse = ncoarse_ds; 3303 csin = csin_ds; 3304 } 3305 3306 /* creates temporary l2gmap and IS for coarse indexes */ 3307 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3308 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3309 3310 /* creates temporary MATIS object for coarse matrix */ 3311 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3312 #if 0 3313 { 3314 PetscViewer viewer; 3315 char filename[256]; 3316 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3317 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3318 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3319 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3320 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3321 } 3322 #endif 3323 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3324 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3325 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3328 3329 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3330 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3331 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3332 const PetscInt *idxs; 3333 ISLocalToGlobalMapping tmap; 3334 3335 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3336 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3337 /* allocate space for temporary storage */ 3338 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3339 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3340 /* allocate for IS array */ 3341 nisdofs = pcbddc->n_ISForDofsLocal; 3342 nisneu = !!pcbddc->NeumannBoundariesLocal; 3343 nis = nisdofs + nisneu; 3344 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3345 /* dofs splitting */ 3346 for (i=0;i<nisdofs;i++) { 3347 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3348 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3349 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3350 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3351 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3352 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3353 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3354 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3355 } 3356 /* neumann boundaries */ 3357 if (pcbddc->NeumannBoundariesLocal) { 3358 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3359 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3360 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3361 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3362 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3363 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3364 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3365 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3366 } 3367 /* free memory */ 3368 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3369 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3370 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3371 } else { 3372 nis = 0; 3373 nisdofs = 0; 3374 nisneu = 0; 3375 isarray = NULL; 3376 } 3377 /* destroy no longer needed map */ 3378 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3379 3380 /* restrict on coarse candidates (if needed) */ 3381 coarse_mat_is = NULL; 3382 if (csin) { 3383 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3384 PetscInt j,tissize,*nisindices; 3385 PetscInt *coarse_candidates; 3386 const PetscInt* tisindices; 3387 /* get coarse candidates' ranks in pc communicator */ 3388 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3389 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3390 for (i=0,j=0;i<all_procs;i++) { 3391 if (!coarse_candidates[i]) { 3392 coarse_candidates[j]=i; 3393 j++; 3394 } 3395 } 3396 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3397 /* get a suitable subassembling pattern */ 3398 if (csin_type_simple) { 3399 PetscMPIInt rank; 3400 PetscInt issize,isidx; 3401 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3402 if (im_active) { 3403 issize = 1; 3404 isidx = (PetscInt)rank; 3405 } else { 3406 issize = 0; 3407 isidx = -1; 3408 } 3409 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3410 } else { 3411 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3412 } 3413 if (pcbddc->dbg_flag) { 3414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3415 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3416 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3417 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3418 for (i=0;i<j;i++) { 3419 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3420 } 3421 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3423 } 3424 /* shift the pattern on coarse candidates */ 3425 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3426 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3427 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3428 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3429 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3430 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3431 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3432 } 3433 if (pcbddc->dbg_flag) { 3434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3436 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3437 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3438 } 3439 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3440 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3441 } else { 3442 if (pcbddc->dbg_flag) { 3443 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3444 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3445 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3446 } 3447 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3448 coarse_mat_is = t_coarse_mat_is; 3449 } 3450 3451 /* create local to global scatters for coarse problem */ 3452 if (compute_vecs) { 3453 PetscInt lrows; 3454 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3455 if (coarse_mat_is) { 3456 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3457 } else { 3458 lrows = 0; 3459 } 3460 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3461 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3462 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3463 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3464 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3465 } 3466 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3467 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3468 3469 /* set defaults for coarse KSP and PC */ 3470 if (multilevel_allowed) { 3471 coarse_ksp_type = KSPRICHARDSON; 3472 coarse_pc_type = PCBDDC; 3473 } else { 3474 coarse_ksp_type = KSPPREONLY; 3475 coarse_pc_type = PCREDUNDANT; 3476 } 3477 3478 /* print some info if requested */ 3479 if (pcbddc->dbg_flag) { 3480 if (!multilevel_allowed) { 3481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3482 if (multilevel_requested) { 3483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3484 } else if (pcbddc->max_levels) { 3485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3486 } 3487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3488 } 3489 } 3490 3491 /* create the coarse KSP object only once with defaults */ 3492 if (coarse_mat_is) { 3493 MatReuse coarse_mat_reuse; 3494 PetscViewer dbg_viewer = NULL; 3495 if (pcbddc->dbg_flag) { 3496 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3497 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3498 } 3499 if (!pcbddc->coarse_ksp) { 3500 char prefix[256],str_level[16]; 3501 size_t len; 3502 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3503 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3504 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3505 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3506 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3507 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3508 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3509 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3510 /* prefix */ 3511 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3512 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3513 if (!pcbddc->current_level) { 3514 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3515 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3516 } else { 3517 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3518 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3519 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3520 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3521 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3522 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3523 } 3524 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3525 /* allow user customization */ 3526 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3527 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3528 } 3529 3530 /* get some info after set from options */ 3531 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3532 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3533 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3534 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3535 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3536 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3537 isbddc = PETSC_FALSE; 3538 } 3539 if (isredundant) { 3540 KSP inner_ksp; 3541 PC inner_pc; 3542 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3543 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3544 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3545 } 3546 3547 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3548 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3549 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3550 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3551 if (nisdofs) { 3552 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3553 for (i=0;i<nisdofs;i++) { 3554 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3555 } 3556 } 3557 if (nisneu) { 3558 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3559 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3560 } 3561 3562 /* assemble coarse matrix */ 3563 if (coarse_reuse) { 3564 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3565 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3566 coarse_mat_reuse = MAT_REUSE_MATRIX; 3567 } else { 3568 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3569 } 3570 if (isbddc || isnn) { 3571 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3572 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3573 if (pcbddc->dbg_flag) { 3574 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3575 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3576 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3577 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3578 } 3579 } 3580 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3581 } else { 3582 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3583 } 3584 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3585 3586 /* propagate symmetry info to coarse matrix */ 3587 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3588 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3589 3590 /* set operators */ 3591 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3592 if (pcbddc->dbg_flag) { 3593 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3594 } 3595 } else { /* processes non partecipating to coarse solver (if any) */ 3596 coarse_mat = 0; 3597 } 3598 ierr = PetscFree(isarray);CHKERRQ(ierr); 3599 #if 0 3600 { 3601 PetscViewer viewer; 3602 char filename[256]; 3603 sprintf(filename,"coarse_mat.m"); 3604 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3605 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3606 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3607 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3608 } 3609 #endif 3610 3611 /* Compute coarse null space (special handling by BDDC only) */ 3612 if (pcbddc->NullSpace) { 3613 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3614 } 3615 3616 if (pcbddc->coarse_ksp) { 3617 Vec crhs,csol; 3618 PetscBool ispreonly; 3619 if (CoarseNullSpace) { 3620 if (isbddc) { 3621 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3622 } else { 3623 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3624 } 3625 } 3626 /* setup coarse ksp */ 3627 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3628 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3629 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3630 /* hack */ 3631 if (!csol) { 3632 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3633 } 3634 if (!crhs) { 3635 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3636 } 3637 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3638 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3639 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3640 KSP check_ksp; 3641 KSPType check_ksp_type; 3642 PC check_pc; 3643 Vec check_vec,coarse_vec; 3644 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3645 PetscInt its; 3646 PetscBool compute_eigs; 3647 PetscReal *eigs_r,*eigs_c; 3648 PetscInt neigs; 3649 const char *prefix; 3650 3651 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3652 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3653 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3654 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3655 if (ispreonly) { 3656 check_ksp_type = KSPPREONLY; 3657 compute_eigs = PETSC_FALSE; 3658 } else { 3659 check_ksp_type = KSPGMRES; 3660 compute_eigs = PETSC_TRUE; 3661 } 3662 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3663 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3664 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3665 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3666 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3667 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3668 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3669 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3670 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3671 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3672 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3673 /* create random vec */ 3674 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3675 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3676 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3677 if (CoarseNullSpace) { 3678 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3679 } 3680 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3681 /* solve coarse problem */ 3682 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3683 if (CoarseNullSpace) { 3684 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3685 } 3686 /* set eigenvalue estimation if preonly has not been requested */ 3687 if (compute_eigs) { 3688 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3689 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3690 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3691 lambda_max = eigs_r[neigs-1]; 3692 lambda_min = eigs_r[0]; 3693 if (pcbddc->use_coarse_estimates) { 3694 if (lambda_max>lambda_min) { 3695 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3696 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3697 } 3698 } 3699 } 3700 3701 /* check coarse problem residual error */ 3702 if (pcbddc->dbg_flag) { 3703 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3704 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3705 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3706 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3707 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3708 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3709 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3710 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3711 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3712 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3713 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3714 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3715 if (compute_eigs) { 3716 PetscReal lambda_max_s,lambda_min_s; 3717 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3718 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3719 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3720 for (i=0;i<neigs;i++) { 3721 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3722 } 3723 } 3724 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3725 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3726 } 3727 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3728 if (compute_eigs) { 3729 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3730 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3731 } 3732 } 3733 } 3734 /* print additional info */ 3735 if (pcbddc->dbg_flag) { 3736 /* waits until all processes reaches this point */ 3737 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3738 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3739 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3740 } 3741 3742 /* free memory */ 3743 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3744 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 3748 #undef __FUNCT__ 3749 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3750 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3751 { 3752 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3753 PC_IS* pcis = (PC_IS*)pc->data; 3754 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3755 PetscInt i,coarse_size; 3756 PetscInt *local_primal_indices; 3757 PetscErrorCode ierr; 3758 3759 PetscFunctionBegin; 3760 /* Compute global number of coarse dofs */ 3761 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3762 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3763 } 3764 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3765 3766 /* check numbering */ 3767 if (pcbddc->dbg_flag) { 3768 PetscScalar coarsesum,*array; 3769 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3770 3771 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3773 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3774 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3775 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3776 for (i=0;i<pcbddc->local_primal_size;i++) { 3777 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3778 } 3779 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3780 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3781 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3782 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3783 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3784 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3785 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3786 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3787 for (i=0;i<pcis->n;i++) { 3788 if (array[i] == 1.0) { 3789 set_error = PETSC_TRUE; 3790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3791 } 3792 } 3793 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3794 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3795 for (i=0;i<pcis->n;i++) { 3796 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3797 } 3798 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3799 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3800 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3801 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3802 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3803 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3804 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3805 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3806 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3808 for (i=0;i<pcbddc->local_primal_size;i++) { 3809 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3810 } 3811 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3812 } 3813 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3814 if (set_error_reduced) { 3815 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3816 } 3817 } 3818 /* get back data */ 3819 *coarse_size_n = coarse_size; 3820 *local_primal_indices_n = local_primal_indices; 3821 PetscFunctionReturn(0); 3822 } 3823 3824 #undef __FUNCT__ 3825 #define __FUNCT__ "PCBDDCGlobalToLocal" 3826 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3827 { 3828 IS localis_t; 3829 PetscInt i,lsize,*idxs,n; 3830 PetscScalar *vals; 3831 PetscErrorCode ierr; 3832 3833 PetscFunctionBegin; 3834 /* get indices in local ordering exploiting local to global map */ 3835 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3836 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3837 for (i=0;i<lsize;i++) vals[i] = 1.0; 3838 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3839 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 3840 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 3841 if (idxs) { /* multilevel guard */ 3842 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3843 } 3844 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 3845 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3846 ierr = PetscFree(vals);CHKERRQ(ierr); 3847 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 3848 /* now compute set in local ordering */ 3849 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3850 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3851 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3852 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 3853 for (i=0,lsize=0;i<n;i++) { 3854 if (PetscRealPart(vals[i]) > 0.5) { 3855 lsize++; 3856 } 3857 } 3858 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3859 for (i=0,lsize=0;i<n;i++) { 3860 if (PetscRealPart(vals[i]) > 0.5) { 3861 idxs[lsize++] = i; 3862 } 3863 } 3864 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3865 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3866 *localis = localis_t; 3867 PetscFunctionReturn(0); 3868 } 3869