1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 536 if (!pcbddc->issym) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 unsymmetric_check = PETSC_TRUE; 624 } else { /* take references to already computed coarse basis */ 625 unsymmetric_check = PETSC_FALSE; 626 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 627 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 628 if (pcbddc->coarse_phi_D) { 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 630 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 631 } 632 } 633 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 634 /* Checking coarse_sub_mat and coarse basis functios */ 635 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 637 if (pcbddc->dbg_flag) { 638 Mat coarse_sub_mat; 639 Mat AUXMAT,TM1,TM2,TM3,TM4; 640 Mat coarse_phi_D,coarse_phi_B; 641 Mat coarse_psi_D,coarse_psi_B; 642 Mat A_II,A_BB,A_IB,A_BI; 643 MatType checkmattype=MATSEQAIJ; 644 PetscReal real_value; 645 646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 652 if (unsymmetric_check) { 653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 655 } 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 657 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } else { 675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } 684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 695 for (i=0;i<pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 699 for (i=0;i<pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 if (unsymmetric_check) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 704 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 708 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 } 712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 722 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 723 if (unsymmetric_check) { 724 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 729 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 730 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 731 } 732 /* free memory */ 733 if (n_vertices) { 734 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 738 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 739 } 740 if (n_constraints) { 741 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 742 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 743 ierr = MatDestroy(&M1);CHKERRQ(ierr); 744 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(auxindices);CHKERRQ(ierr); 747 /* get back data */ 748 *coarse_submat_vals_n = coarse_submat_vals; 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 755 { 756 PC_IS* pcis = (PC_IS*)(pc->data); 757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 759 PetscBool issbaij,isseqaij; 760 /* manage repeated solves */ 761 MatReuse reuse; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 if (pc->flag == SAME_NONZERO_PATTERN) { 772 reuse = MAT_REUSE_MATRIX; 773 } else { 774 reuse = MAT_INITIAL_MATRIX; 775 } 776 } 777 if (reuse == MAT_INITIAL_MATRIX) { 778 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 783 } 784 785 /* transform local matrices if needed */ 786 if (pcbddc->ChangeOfBasisMatrix) { 787 Mat change_mat_all; 788 PetscScalar *row_cmat_values; 789 PetscInt *row_cmat_indices; 790 PetscInt *nnz,*is_indices,*temp_indices; 791 PetscInt i,j,k,n_D,n_B; 792 793 /* Get Non-overlapping dimensions */ 794 n_B = pcis->n_B; 795 n_D = pcis->n-n_B; 796 797 /* compute nonzero structure of change of basis on all local nodes */ 798 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 801 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 k=1; 804 for (i=0;i<n_B;i++) { 805 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 nnz[is_indices[i]]=j; 807 if (k < j) k = j; 808 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 } 810 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 /* assemble change of basis matrix on the whole set of local dofs */ 812 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 813 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 814 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 815 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 816 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 for (i=0;i<n_D;i++) { 819 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0;i<n_B;i++) { 824 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 826 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 } 829 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 831 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 833 if (isseqaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* 888 No need to setup local scatters if 889 - primal space is unchanged 890 AND 891 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 892 AND 893 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 894 */ 895 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 896 PetscFunctionReturn(0); 897 } 898 /* destroy old objects */ 899 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 902 /* Set Non-overlapping dimensions */ 903 n_B = pcis->n_B; n_D = pcis->n - n_B; 904 n_vertices = pcbddc->n_actual_vertices; 905 /* create auxiliary bitmask */ 906 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 909 } 910 911 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 912 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 913 for (i=0, n_R=0; i<pcis->n; i++) { 914 if (!PetscBTLookup(bitmask,i)) { 915 idx_R_local[n_R] = i; 916 n_R++; 917 } 918 } 919 920 /* Block code */ 921 vbs = 1; 922 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 923 if (bs>1 && !(n_vertices%bs)) { 924 PetscBool is_blocked = PETSC_TRUE; 925 PetscInt *vary; 926 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 927 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 928 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 929 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 930 for (i=0; i<n_vertices; i++) { 931 if (vary[i]!=0 && vary[i]!=bs) { 932 is_blocked = PETSC_FALSE; 933 break; 934 } 935 } 936 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 937 vbs = bs; 938 for (i=0;i<n_R/vbs;i++) { 939 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 940 } 941 } 942 ierr = PetscFree(vary);CHKERRQ(ierr); 943 } 944 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 945 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 946 947 /* print some info if requested */ 948 if (pcbddc->dbg_flag) { 949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 } 958 959 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 960 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 962 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 963 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0; i<n_D; i++) { 965 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_R; i++) { 969 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 970 aux_array1[j++] = i; 971 } 972 } 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 974 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_B; i++) { 976 if (!PetscBTLookup(bitmask,is_indices[i])) { 977 aux_array2[j++] = i; 978 } 979 } 980 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 985 986 if (pcbddc->switch_static || pcbddc->dbg_flag) { 987 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 988 for (i=0, j=0; i<n_R; i++) { 989 if (PetscBTLookup(bitmask,idx_R_local[i])) { 990 aux_array1[j++] = i; 991 } 992 } 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 994 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 995 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 996 } 997 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1006 { 1007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1008 PC_IS *pcis = (PC_IS*)pc->data; 1009 PC pc_temp; 1010 Mat A_RR; 1011 MatReuse reuse; 1012 PetscScalar m_one = -1.0; 1013 PetscReal value; 1014 PetscInt n_D,n_R,ibs,mbs; 1015 PetscBool use_exact,use_exact_reduced,issbaij; 1016 PetscErrorCode ierr; 1017 /* prefixes stuff */ 1018 char dir_prefix[256],neu_prefix[256],str_level[16]; 1019 size_t len; 1020 1021 PetscFunctionBegin; 1022 1023 /* compute prefixes */ 1024 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1025 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1026 if (!pcbddc->current_level) { 1027 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 } else { 1032 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1033 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1034 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1035 len -= 15; /* remove "pc_bddc_coarse_" */ 1036 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1037 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1038 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1039 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1040 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1041 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1042 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1043 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1044 } 1045 1046 /* DIRICHLET PROBLEM */ 1047 /* Matrix for Dirichlet problem is pcis->A_II */ 1048 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1049 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1050 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1051 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1052 /* default */ 1053 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1054 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1055 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1056 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1057 if (issbaij) { 1058 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1059 } else { 1060 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1061 } 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1065 } 1066 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1067 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1068 if (!n_D) { 1069 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1070 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1071 } 1072 /* Set Up KSP for Dirichlet problem of BDDC */ 1073 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1074 /* set ksp_D into pcis data */ 1075 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1077 pcis->ksp_D = pcbddc->ksp_D; 1078 1079 /* NEUMANN PROBLEM */ 1080 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1081 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1082 if (pcbddc->ksp_R) { /* already created ksp */ 1083 PetscInt nn_R; 1084 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1085 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1086 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1087 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1088 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1092 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1093 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1094 reuse = MAT_INITIAL_MATRIX; 1095 } else { /* safe to reuse the matrix */ 1096 reuse = MAT_REUSE_MATRIX; 1097 } 1098 } 1099 /* last check */ 1100 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1101 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 } else { /* first time, so we need to create the matrix */ 1105 reuse = MAT_INITIAL_MATRIX; 1106 } 1107 /* extract A_RR */ 1108 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1109 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1110 if (ibs != mbs) { 1111 Mat newmat; 1112 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1113 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1115 } else { 1116 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1117 } 1118 if (!pcbddc->ksp_R) { /* create object if not present */ 1119 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1120 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1121 /* default */ 1122 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1123 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1124 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1125 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1126 if (issbaij) { 1127 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1128 } else { 1129 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1130 } 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1136 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1137 if (!n_R) { 1138 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1139 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1140 } 1141 /* Set Up KSP for Neumann problem of BDDC */ 1142 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1143 1144 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1145 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1146 /* Dirichlet */ 1147 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1148 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1149 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1152 /* need to be adapted? */ 1153 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1154 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1155 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1156 /* print info */ 1157 if (pcbddc->dbg_flag) { 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1171 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1172 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1175 /* need to be adapted? */ 1176 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1177 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1178 /* print info */ 1179 if (pcbddc->dbg_flag) { 1180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 } 1183 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1185 } 1186 } 1187 /* free Neumann problem's matrix */ 1188 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1194 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1195 { 1196 PetscErrorCode ierr; 1197 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1198 1199 PetscFunctionBegin; 1200 if (applytranspose) { 1201 if (pcbddc->local_auxmat1) { 1202 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1203 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1204 } 1205 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1206 } else { 1207 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1210 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1211 } 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1219 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1220 { 1221 PetscErrorCode ierr; 1222 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1223 PC_IS* pcis = (PC_IS*) (pc->data); 1224 const PetscScalar zero = 0.0; 1225 1226 PetscFunctionBegin; 1227 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1228 if (applytranspose) { 1229 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1230 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1231 } else { 1232 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1234 } 1235 /* start communications from local primal nodes to rhs of coarse solver */ 1236 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1237 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 1240 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1241 /* TODO remove null space when doing multilevel */ 1242 if (pcbddc->coarse_ksp) { 1243 if (applytranspose) { 1244 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1245 } else { 1246 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1247 } 1248 } 1249 /* start communications from coarse solver solution to local primal nodes */ 1250 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 1252 /* Local solution on R nodes */ 1253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 } 1260 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1261 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 if (pcbddc->switch_static) { 1265 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 } 1268 1269 /* complete communications from coarse sol to local primal nodes */ 1270 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 1272 /* Sum contributions from two levels */ 1273 if (applytranspose) { 1274 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1276 } else { 1277 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1278 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 PetscScalar *array,*array2; 1291 Vec from,to; 1292 1293 PetscFunctionBegin; 1294 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1295 from = pcbddc->coarse_vec; 1296 to = pcbddc->vec1_P; 1297 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1298 Vec tvec; 1299 PetscInt lsize; 1300 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1301 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1302 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1303 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1304 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1307 } 1308 } else { /* from local to global -> put data in coarse right hand side */ 1309 from = pcbddc->vec1_P; 1310 to = pcbddc->coarse_vec; 1311 } 1312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1318 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1319 { 1320 PetscErrorCode ierr; 1321 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1322 PetscScalar *array,*array2; 1323 Vec from,to; 1324 1325 PetscFunctionBegin; 1326 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1327 from = pcbddc->coarse_vec; 1328 to = pcbddc->vec1_P; 1329 } else { /* from local to global -> put data in coarse right hand side */ 1330 from = pcbddc->vec1_P; 1331 to = pcbddc->coarse_vec; 1332 } 1333 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1334 if (smode == SCATTER_FORWARD) { 1335 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1336 Vec tvec; 1337 PetscInt lsize; 1338 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1339 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1340 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1341 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1342 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1343 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1344 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1345 } 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /* uncomment for testing purposes */ 1351 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1354 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1355 { 1356 PetscErrorCode ierr; 1357 PC_IS* pcis = (PC_IS*)(pc->data); 1358 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1359 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1360 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1361 MatType impMatType=MATSEQAIJ; 1362 /* one and zero */ 1363 PetscScalar one=1.0,zero=0.0; 1364 /* space to store constraints and their local indices */ 1365 PetscScalar *temp_quadrature_constraint; 1366 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1367 /* iterators */ 1368 PetscInt i,j,k,total_counts,temp_start_ptr; 1369 /* stuff to store connected components stored in pcbddc->mat_graph */ 1370 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1371 PetscInt n_ISForFaces,n_ISForEdges; 1372 /* near null space stuff */ 1373 MatNullSpace nearnullsp; 1374 const Vec *nearnullvecs; 1375 Vec *localnearnullsp; 1376 PetscBool nnsp_has_cnst; 1377 PetscInt nnsp_size; 1378 PetscScalar *array; 1379 /* BLAS integers */ 1380 PetscBLASInt lwork,lierr; 1381 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1382 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1383 /* LAPACK working arrays for SVD or POD */ 1384 PetscBool skip_lapack; 1385 PetscScalar *work; 1386 PetscReal *singular_vals; 1387 #if defined(PETSC_USE_COMPLEX) 1388 PetscReal *rwork; 1389 #endif 1390 #if defined(PETSC_MISSING_LAPACK_GESVD) 1391 PetscBLASInt Blas_one_2=1; 1392 PetscScalar *temp_basis,*correlation_mat; 1393 #else 1394 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1395 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1396 #endif 1397 /* reuse */ 1398 PetscInt olocal_primal_size; 1399 PetscInt *oprimal_indices_local_idxs; 1400 /* change of basis */ 1401 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1402 PetscBool boolforchange,qr_needed; 1403 PetscBT touched,change_basis,qr_needed_idx; 1404 /* auxiliary stuff */ 1405 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1406 /* some quantities */ 1407 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1408 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1409 1410 1411 PetscFunctionBegin; 1412 /* Destroy Mat objects computed previously */ 1413 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1414 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1415 /* Get index sets for faces, edges and vertices from graph */ 1416 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1417 /* free unneeded index sets */ 1418 if (!pcbddc->use_vertices) { 1419 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1420 } 1421 if (!pcbddc->use_edges) { 1422 for (i=0;i<n_ISForEdges;i++) { 1423 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1426 n_ISForEdges = 0; 1427 } 1428 if (!pcbddc->use_faces) { 1429 for (i=0;i<n_ISForFaces;i++) { 1430 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1433 n_ISForFaces = 0; 1434 } 1435 /* HACKS (the following two blocks of code) */ 1436 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1437 pcbddc->use_change_of_basis = PETSC_TRUE; 1438 if (!ISForEdges) { 1439 pcbddc->use_change_on_faces = PETSC_TRUE; 1440 } 1441 } 1442 if (pcbddc->NullSpace) { 1443 /* use_change_of_basis should be consistent among processors */ 1444 PetscBool tbool[2],gbool[2]; 1445 tbool [0] = pcbddc->use_change_of_basis; 1446 tbool [1] = pcbddc->use_change_on_faces; 1447 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1448 pcbddc->use_change_of_basis = gbool[0]; 1449 pcbddc->use_change_on_faces = gbool[1]; 1450 } 1451 /* print some info */ 1452 if (pcbddc->dbg_flag) { 1453 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1455 i = 0; 1456 if (ISForVertices) { 1457 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1458 } 1459 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 } 1464 /* check if near null space is attached to global mat */ 1465 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1466 if (nearnullsp) { 1467 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1468 /* remove any stored info */ 1469 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1470 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1471 /* store information for BDDC solver reuse */ 1472 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1473 pcbddc->onearnullspace = nearnullsp; 1474 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1475 for (i=0;i<nnsp_size;i++) { 1476 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1477 } 1478 } else { /* if near null space is not provided BDDC uses constants by default */ 1479 nnsp_size = 0; 1480 nnsp_has_cnst = PETSC_TRUE; 1481 } 1482 /* get max number of constraints on a single cc */ 1483 max_constraints = nnsp_size; 1484 if (nnsp_has_cnst) max_constraints++; 1485 1486 /* 1487 Evaluate maximum storage size needed by the procedure 1488 - temp_indices will contain start index of each constraint stored as follows 1489 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1490 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1491 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1492 */ 1493 total_counts = n_ISForFaces+n_ISForEdges; 1494 total_counts *= max_constraints; 1495 n_vertices = 0; 1496 if (ISForVertices) { 1497 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1498 } 1499 total_counts += n_vertices; 1500 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1501 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1502 total_counts = 0; 1503 max_size_of_constraint = 0; 1504 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1505 if (i<n_ISForEdges) { 1506 used_IS = &ISForEdges[i]; 1507 } else { 1508 used_IS = &ISForFaces[i-n_ISForEdges]; 1509 } 1510 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1511 total_counts += j; 1512 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1513 } 1514 total_counts *= max_constraints; 1515 total_counts += n_vertices; 1516 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1517 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1518 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1519 /* get local part of global near null space vectors */ 1520 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1521 for (k=0;k<nnsp_size;k++) { 1522 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1523 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 } 1526 1527 /* whether or not to skip lapack calls */ 1528 skip_lapack = PETSC_TRUE; 1529 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1530 1531 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1532 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1533 PetscScalar temp_work; 1534 #if defined(PETSC_MISSING_LAPACK_GESVD) 1535 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1536 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1537 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1538 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1539 #if defined(PETSC_USE_COMPLEX) 1540 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1541 #endif 1542 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1543 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1544 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1545 lwork = -1; 1546 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1547 #if !defined(PETSC_USE_COMPLEX) 1548 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1549 #else 1550 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1551 #endif 1552 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1553 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1554 #else /* on missing GESVD */ 1555 /* SVD */ 1556 PetscInt max_n,min_n; 1557 max_n = max_size_of_constraint; 1558 min_n = max_constraints; 1559 if (max_size_of_constraint < max_constraints) { 1560 min_n = max_size_of_constraint; 1561 max_n = max_constraints; 1562 } 1563 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1564 #if defined(PETSC_USE_COMPLEX) 1565 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1566 #endif 1567 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1568 lwork = -1; 1569 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1570 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1571 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1573 #if !defined(PETSC_USE_COMPLEX) 1574 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1575 #else 1576 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1577 #endif 1578 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1579 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1580 #endif /* on missing GESVD */ 1581 /* Allocate optimal workspace */ 1582 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1583 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1584 } 1585 /* Now we can loop on constraining sets */ 1586 total_counts = 0; 1587 temp_indices[0] = 0; 1588 /* vertices */ 1589 if (ISForVertices) { 1590 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1591 if (nnsp_has_cnst) { /* consider all vertices */ 1592 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1593 for (i=0;i<n_vertices;i++) { 1594 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1595 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1596 total_counts++; 1597 } 1598 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1599 PetscBool used_vertex; 1600 for (i=0;i<n_vertices;i++) { 1601 used_vertex = PETSC_FALSE; 1602 k = 0; 1603 while (!used_vertex && k<nnsp_size) { 1604 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1605 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1606 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1607 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1608 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1609 total_counts++; 1610 used_vertex = PETSC_TRUE; 1611 } 1612 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1613 k++; 1614 } 1615 } 1616 } 1617 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1618 n_vertices = total_counts; 1619 } 1620 1621 /* edges and faces */ 1622 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1623 if (i<n_ISForEdges) { 1624 used_IS = &ISForEdges[i]; 1625 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1626 } else { 1627 used_IS = &ISForFaces[i-n_ISForEdges]; 1628 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1629 } 1630 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1631 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1632 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1633 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1634 /* change of basis should not be performed on local periodic nodes */ 1635 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1636 if (nnsp_has_cnst) { 1637 PetscScalar quad_value; 1638 temp_constraints++; 1639 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1640 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1641 for (j=0;j<size_of_constraint;j++) { 1642 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1643 } 1644 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1645 total_counts++; 1646 } 1647 for (k=0;k<nnsp_size;k++) { 1648 PetscReal real_value; 1649 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1650 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1651 for (j=0;j<size_of_constraint;j++) { 1652 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1653 } 1654 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1655 /* check if array is null on the connected component */ 1656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1657 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1658 if (real_value > 0.0) { /* keep indices and values */ 1659 temp_constraints++; 1660 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1661 total_counts++; 1662 } 1663 } 1664 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1665 valid_constraints = temp_constraints; 1666 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1667 if (!pcbddc->use_nnsp_true && temp_constraints) { 1668 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1669 1670 #if defined(PETSC_MISSING_LAPACK_GESVD) 1671 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1672 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1673 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1674 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1675 from that computed using LAPACKgesvd 1676 -> This is due to a different computation of eigenvectors in LAPACKheev 1677 -> The quality of the POD-computed basis will be the same */ 1678 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1679 /* Store upper triangular part of correlation matrix */ 1680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1681 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1682 for (j=0;j<temp_constraints;j++) { 1683 for (k=0;k<j+1;k++) { 1684 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1685 } 1686 } 1687 /* compute eigenvalues and eigenvectors of correlation matrix */ 1688 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1689 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1690 #if !defined(PETSC_USE_COMPLEX) 1691 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1692 #else 1693 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1694 #endif 1695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1696 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1697 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1698 j=0; 1699 while (j < temp_constraints && singular_vals[j] < tol) j++; 1700 total_counts=total_counts-j; 1701 valid_constraints = temp_constraints-j; 1702 /* scale and copy POD basis into used quadrature memory */ 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1705 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1707 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1709 if (j<temp_constraints) { 1710 PetscInt ii; 1711 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1713 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1715 for (k=0;k<temp_constraints-j;k++) { 1716 for (ii=0;ii<size_of_constraint;ii++) { 1717 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1718 } 1719 } 1720 } 1721 #else /* on missing GESVD */ 1722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1726 #if !defined(PETSC_USE_COMPLEX) 1727 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1728 #else 1729 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1730 #endif 1731 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1732 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1733 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1734 k = temp_constraints; 1735 if (k > size_of_constraint) k = size_of_constraint; 1736 j = 0; 1737 while (j < k && singular_vals[k-j-1] < tol) j++; 1738 total_counts = total_counts-temp_constraints+k-j; 1739 valid_constraints = k-j; 1740 #endif /* on missing GESVD */ 1741 } 1742 /* setting change_of_basis flag is safe now */ 1743 if (boolforchange) { 1744 for (j=0;j<valid_constraints;j++) { 1745 PetscBTSet(change_basis,total_counts-j-1); 1746 } 1747 } 1748 } 1749 /* free index sets of faces, edges and vertices */ 1750 for (i=0;i<n_ISForFaces;i++) { 1751 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1752 } 1753 if (n_ISForFaces) { 1754 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1755 } 1756 for (i=0;i<n_ISForEdges;i++) { 1757 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1758 } 1759 if (n_ISForEdges) { 1760 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1761 } 1762 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1763 /* map temp_indices_to_constraint in boundary numbering */ 1764 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1765 if (i != temp_indices[total_counts]) { 1766 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1767 } 1768 1769 /* free workspace */ 1770 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1771 ierr = PetscFree(work);CHKERRQ(ierr); 1772 #if defined(PETSC_USE_COMPLEX) 1773 ierr = PetscFree(rwork);CHKERRQ(ierr); 1774 #endif 1775 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1776 #if defined(PETSC_MISSING_LAPACK_GESVD) 1777 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1778 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1779 #endif 1780 } 1781 for (k=0;k<nnsp_size;k++) { 1782 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1783 } 1784 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1785 1786 /* set quantities in pcbddc data structure and store previous primal size */ 1787 /* n_vertices defines the number of subdomain corners in the primal space */ 1788 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1789 olocal_primal_size = pcbddc->local_primal_size; 1790 pcbddc->local_primal_size = total_counts; 1791 pcbddc->n_vertices = n_vertices; 1792 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1793 1794 /* Create constraint matrix */ 1795 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1796 /* so we need to set it up properly either with or without change of basis */ 1797 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1798 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1799 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1800 /* array to compute a local numbering of constraints : vertices first then constraints */ 1801 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1802 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1803 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1804 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1805 /* auxiliary stuff for basis change */ 1806 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1807 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1808 1809 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1810 total_primal_vertices=0; 1811 for (i=0;i<pcbddc->local_primal_size;i++) { 1812 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1813 if (size_of_constraint == 1) { 1814 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1815 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1816 aux_primal_minloc[total_primal_vertices]=0; 1817 total_primal_vertices++; 1818 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1819 PetscInt min_loc,min_index; 1820 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1821 /* find first untouched local node */ 1822 k = 0; 1823 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1824 min_index = global_indices[k]; 1825 min_loc = k; 1826 /* search the minimum among global nodes already untouched on the cc */ 1827 for (k=1;k<size_of_constraint;k++) { 1828 /* there can be more than one constraint on a single connected component */ 1829 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1830 min_index = global_indices[k]; 1831 min_loc = k; 1832 } 1833 } 1834 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1835 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1836 aux_primal_minloc[total_primal_vertices]=min_loc; 1837 total_primal_vertices++; 1838 } 1839 } 1840 /* determine if a QR strategy is needed for change of basis */ 1841 qr_needed = PETSC_FALSE; 1842 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1843 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1844 if (PetscBTLookup(change_basis,i)) { 1845 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1846 j = 0; 1847 for (k=0;k<size_of_constraint;k++) { 1848 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1849 j++; 1850 } 1851 } 1852 /* found more than one primal dof on the cc */ 1853 if (j > 1) { 1854 PetscBTSet(qr_needed_idx,i); 1855 qr_needed = PETSC_TRUE; 1856 } 1857 } 1858 } 1859 /* free workspace */ 1860 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1861 1862 /* permute indices in order to have a sorted set of vertices */ 1863 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1864 1865 /* nonzero structure of constraint matrix */ 1866 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1867 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1868 j=total_primal_vertices; 1869 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1870 if (!PetscBTLookup(change_basis,i)) { 1871 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1872 j++; 1873 } 1874 } 1875 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1876 ierr = PetscFree(nnz);CHKERRQ(ierr); 1877 /* set values in constraint matrix */ 1878 for (i=0;i<total_primal_vertices;i++) { 1879 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1880 } 1881 total_counts = total_primal_vertices; 1882 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1883 if (!PetscBTLookup(change_basis,i)) { 1884 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1885 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1886 total_counts++; 1887 } 1888 } 1889 /* assembling */ 1890 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1891 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1892 /* 1893 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1894 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1895 */ 1896 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1897 if (pcbddc->use_change_of_basis) { 1898 /* dual and primal dofs on a single cc */ 1899 PetscInt dual_dofs,primal_dofs; 1900 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1901 PetscInt primal_counter; 1902 /* working stuff for GEQRF */ 1903 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1904 PetscBLASInt lqr_work; 1905 /* working stuff for UNGQR */ 1906 PetscScalar *gqr_work,lgqr_work_t; 1907 PetscBLASInt lgqr_work; 1908 /* working stuff for TRTRS */ 1909 PetscScalar *trs_rhs; 1910 PetscBLASInt Blas_NRHS; 1911 /* pointers for values insertion into change of basis matrix */ 1912 PetscInt *start_rows,*start_cols; 1913 PetscScalar *start_vals; 1914 /* working stuff for values insertion */ 1915 PetscBT is_primal; 1916 1917 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1918 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1919 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1920 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1921 /* work arrays */ 1922 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1923 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1924 /* nonzeros per row */ 1925 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1926 if (PetscBTLookup(change_basis,i)) { 1927 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1928 if (PetscBTLookup(qr_needed_idx,i)) { 1929 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1930 } else { 1931 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1932 /* get local primal index on the cc */ 1933 j = 0; 1934 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1935 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1936 } 1937 } 1938 } 1939 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1940 ierr = PetscFree(nnz);CHKERRQ(ierr); 1941 /* Set initial identity in the matrix */ 1942 for (i=0;i<pcis->n_B;i++) { 1943 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1944 } 1945 1946 if (pcbddc->dbg_flag) { 1947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1949 } 1950 1951 1952 /* Now we loop on the constraints which need a change of basis */ 1953 /* 1954 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1955 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1956 1957 Basic blocks of change of basis matrix T computed 1958 1959 - Using the following block transformation if there is only a primal dof on the cc 1960 (in the example, primal dof is the last one of the edge in LOCAL ordering 1961 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1962 | 1 0 ... 0 1 | 1963 | 0 1 ... 0 1 | 1964 | ... | 1965 | 0 ... 1 1 | 1966 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1967 1968 - via QR decomposition of constraints otherwise 1969 */ 1970 if (qr_needed) { 1971 /* space to store Q */ 1972 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1973 /* first we issue queries for optimal work */ 1974 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1975 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1976 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1977 lqr_work = -1; 1978 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1979 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1980 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1981 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1982 lgqr_work = -1; 1983 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1985 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1986 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1987 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1988 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1989 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1990 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1991 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1992 /* array to store scaling factors for reflectors */ 1993 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1994 /* array to store rhs and solution of triangular solver */ 1995 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1996 /* allocating workspace for check */ 1997 if (pcbddc->dbg_flag) { 1998 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1999 } 2000 } 2001 /* array to store whether a node is primal or not */ 2002 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2003 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2004 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2005 if (i != total_primal_vertices) { 2006 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2007 } 2008 for (i=0;i<total_primal_vertices;i++) { 2009 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2010 } 2011 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2012 2013 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2014 /* -> using implicit ordering contained in temp_indices data */ 2015 total_counts = pcbddc->n_vertices; 2016 primal_counter = total_counts; 2017 while (total_counts<pcbddc->local_primal_size) { 2018 primal_dofs = 1; 2019 if (PetscBTLookup(change_basis,total_counts)) { 2020 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2021 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2022 primal_dofs++; 2023 } 2024 /* get constraint info */ 2025 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2026 dual_dofs = size_of_constraint-primal_dofs; 2027 2028 if (pcbddc->dbg_flag) { 2029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2030 } 2031 2032 if (primal_dofs > 1) { /* QR */ 2033 2034 /* copy quadrature constraints for change of basis check */ 2035 if (pcbddc->dbg_flag) { 2036 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2037 } 2038 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2039 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2040 2041 /* compute QR decomposition of constraints */ 2042 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2043 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2045 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2046 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2047 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2048 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2049 2050 /* explictly compute R^-T */ 2051 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2052 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2057 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2058 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2059 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2060 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2061 2062 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2065 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2067 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2068 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2069 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2070 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2071 2072 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2073 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2074 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2075 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2076 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2077 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2081 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2082 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2083 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2084 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2085 2086 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2087 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2088 /* insert cols for primal dofs */ 2089 for (j=0;j<primal_dofs;j++) { 2090 start_vals = &qr_basis[j*size_of_constraint]; 2091 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2092 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2093 } 2094 /* insert cols for dual dofs */ 2095 for (j=0,k=0;j<dual_dofs;k++) { 2096 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2097 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2098 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2099 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2100 j++; 2101 } 2102 } 2103 2104 /* check change of basis */ 2105 if (pcbddc->dbg_flag) { 2106 PetscInt ii,jj; 2107 PetscBool valid_qr=PETSC_TRUE; 2108 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2115 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2116 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2117 for (jj=0;jj<size_of_constraint;jj++) { 2118 for (ii=0;ii<primal_dofs;ii++) { 2119 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2120 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2121 } 2122 } 2123 if (!valid_qr) { 2124 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2125 for (jj=0;jj<size_of_constraint;jj++) { 2126 for (ii=0;ii<primal_dofs;ii++) { 2127 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2128 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2129 } 2130 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2131 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2132 } 2133 } 2134 } 2135 } else { 2136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2137 } 2138 } 2139 } else { /* simple transformation block */ 2140 PetscInt row,col; 2141 PetscScalar val; 2142 for (j=0;j<size_of_constraint;j++) { 2143 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2144 if (!PetscBTLookup(is_primal,row)) { 2145 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2146 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2147 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2148 } else { 2149 for (k=0;k<size_of_constraint;k++) { 2150 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2151 if (row != col) { 2152 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2153 } else { 2154 val = 1.0; 2155 } 2156 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2157 } 2158 } 2159 } 2160 if (pcbddc->dbg_flag) { 2161 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2162 } 2163 } 2164 /* increment primal counter */ 2165 primal_counter += primal_dofs; 2166 } else { 2167 if (pcbddc->dbg_flag) { 2168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2169 } 2170 } 2171 /* increment constraint counter total_counts */ 2172 total_counts += primal_dofs; 2173 } 2174 2175 /* free workspace */ 2176 if (qr_needed) { 2177 if (pcbddc->dbg_flag) { 2178 ierr = PetscFree(work);CHKERRQ(ierr); 2179 } 2180 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2181 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2182 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2183 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2184 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2185 } 2186 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2187 /* assembling */ 2188 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2189 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2190 /* 2191 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2192 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2193 */ 2194 } 2195 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2196 if (pcbddc->user_ChangeOfBasisMatrix) { 2197 PetscInt rows,cols; 2198 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2199 if (rows == pcis->n && cols == pcis->n) { 2200 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2201 } else { 2202 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2203 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2204 } 2205 } 2206 2207 /* get indices in local ordering for vertices and constraints */ 2208 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2209 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2210 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2211 } 2212 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2213 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2214 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2215 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2216 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2217 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2218 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2219 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2220 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2221 /* set quantities in PCBDDC data struct */ 2222 pcbddc->n_actual_vertices = i; 2223 /* check if a new primal space has been introduced */ 2224 pcbddc->new_primal_space_local = PETSC_TRUE; 2225 if (olocal_primal_size == pcbddc->local_primal_size) { 2226 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2227 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2228 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2229 } 2230 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2231 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2232 2233 /* flush dbg viewer */ 2234 if (pcbddc->dbg_flag) { 2235 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2236 } 2237 2238 /* free workspace */ 2239 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2240 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2241 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2242 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2243 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2244 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2245 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2246 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2252 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2253 { 2254 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2255 PC_IS *pcis = (PC_IS*)pc->data; 2256 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2257 PetscInt ierr,i,vertex_size; 2258 PetscViewer viewer=pcbddc->dbg_viewer; 2259 2260 PetscFunctionBegin; 2261 /* Reset previously computed graph */ 2262 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2263 /* Init local Graph struct */ 2264 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2265 2266 /* Check validity of the csr graph passed in by the user */ 2267 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2268 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2269 } 2270 2271 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2272 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2273 Mat mat_adj; 2274 const PetscInt *xadj,*adjncy; 2275 PetscBool flg_row=PETSC_TRUE; 2276 2277 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2278 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2279 if (!flg_row) { 2280 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2281 } 2282 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2283 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2284 if (!flg_row) { 2285 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2286 } 2287 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2288 } 2289 2290 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2291 vertex_size = 1; 2292 if (pcbddc->user_provided_isfordofs) { 2293 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2294 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2295 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2296 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2297 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2298 } 2299 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2300 pcbddc->n_ISForDofs = 0; 2301 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2302 } 2303 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2304 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2305 } else { 2306 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2307 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2308 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2309 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2310 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2311 } 2312 } 2313 } 2314 2315 /* Setup of Graph */ 2316 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2317 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2318 } 2319 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2320 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2321 } 2322 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2323 2324 /* Graph's connected components analysis */ 2325 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2326 2327 /* print some info to stdout */ 2328 if (pcbddc->dbg_flag) { 2329 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2330 } 2331 2332 /* mark topography has done */ 2333 pcbddc->recompute_topography = PETSC_FALSE; 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2339 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2340 { 2341 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2342 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2343 PetscErrorCode ierr; 2344 2345 PetscFunctionBegin; 2346 n = 0; 2347 vertices = 0; 2348 if (pcbddc->ConstraintMatrix) { 2349 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2350 for (i=0;i<local_primal_size;i++) { 2351 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2352 if (size_of_constraint == 1) n++; 2353 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2354 } 2355 if (vertices_idx) { 2356 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2357 n = 0; 2358 for (i=0;i<local_primal_size;i++) { 2359 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2360 if (size_of_constraint == 1) { 2361 vertices[n++]=row_cmat_indices[0]; 2362 } 2363 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2364 } 2365 } 2366 } 2367 *n_vertices = n; 2368 if (vertices_idx) *vertices_idx = vertices; 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2374 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2375 { 2376 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2377 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2378 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2379 PetscBT touched; 2380 PetscErrorCode ierr; 2381 2382 /* This function assumes that the number of local constraints per connected component 2383 is not greater than the number of nodes defined for the connected component 2384 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2385 PetscFunctionBegin; 2386 n = 0; 2387 constraints_index = 0; 2388 if (pcbddc->ConstraintMatrix) { 2389 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2390 max_size_of_constraint = 0; 2391 for (i=0;i<local_primal_size;i++) { 2392 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2393 if (size_of_constraint > 1) { 2394 n++; 2395 } 2396 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2397 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2398 } 2399 if (constraints_idx) { 2400 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2401 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2402 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2403 n = 0; 2404 for (i=0;i<local_primal_size;i++) { 2405 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2406 if (size_of_constraint > 1) { 2407 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2408 /* find first untouched local node */ 2409 j = 0; 2410 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2411 min_index = row_cmat_global_indices[j]; 2412 min_loc = j; 2413 /* search the minimum among nodes not yet touched on the connected component 2414 since there can be more than one constraint on a single cc */ 2415 for (j=1;j<size_of_constraint;j++) { 2416 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2417 min_index = row_cmat_global_indices[j]; 2418 min_loc = j; 2419 } 2420 } 2421 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2422 constraints_index[n++] = row_cmat_indices[min_loc]; 2423 } 2424 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2425 } 2426 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2427 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2428 } 2429 } 2430 *n_constraints = n; 2431 if (constraints_idx) *constraints_idx = constraints_index; 2432 PetscFunctionReturn(0); 2433 } 2434 2435 /* the next two functions has been adapted from pcis.c */ 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "PCBDDCApplySchur" 2438 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2439 { 2440 PetscErrorCode ierr; 2441 PC_IS *pcis = (PC_IS*)(pc->data); 2442 2443 PetscFunctionBegin; 2444 if (!vec2_B) { vec2_B = v; } 2445 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2446 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2447 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2448 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2449 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2455 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2456 { 2457 PetscErrorCode ierr; 2458 PC_IS *pcis = (PC_IS*)(pc->data); 2459 2460 PetscFunctionBegin; 2461 if (!vec2_B) { vec2_B = v; } 2462 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2463 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2464 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2465 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2466 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "PCBDDCSubsetNumbering" 2472 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2473 { 2474 Vec local_vec,global_vec; 2475 IS seqis,paris; 2476 VecScatter scatter_ctx; 2477 PetscScalar *array; 2478 PetscInt *temp_global_dofs; 2479 PetscScalar globalsum; 2480 PetscInt i,j,s; 2481 PetscInt nlocals,first_index,old_index,max_local; 2482 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2483 PetscMPIInt *dof_sizes,*dof_displs; 2484 PetscBool first_found; 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 /* mpi buffers */ 2489 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2490 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2491 j = ( !rank_prec_comm ? size_prec_comm : 0); 2492 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2494 /* get maximum size of subset */ 2495 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2496 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2497 max_local = 0; 2498 for (i=0;i<n_local_dofs;i++) { 2499 if (max_local < temp_global_dofs[i] ) { 2500 max_local = temp_global_dofs[i]; 2501 } 2502 } 2503 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2504 max_global++; 2505 max_local = 0; 2506 for (i=0;i<n_local_dofs;i++) { 2507 if (max_local < local_dofs[i] ) { 2508 max_local = local_dofs[i]; 2509 } 2510 } 2511 max_local++; 2512 /* allocate workspace */ 2513 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2514 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2515 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2516 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2517 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2518 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2519 /* create scatter */ 2520 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2521 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2522 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2523 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2524 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2525 /* init array */ 2526 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2527 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2528 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2529 if (local_dofs_mult) { 2530 for (i=0;i<n_local_dofs;i++) { 2531 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2532 } 2533 } else { 2534 for (i=0;i<n_local_dofs;i++) { 2535 array[local_dofs[i]]=1.0; 2536 } 2537 } 2538 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2539 /* scatter into global vec and get total number of global dofs */ 2540 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2541 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2542 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2543 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2544 /* Fill global_vec with cumulative function for global numbering */ 2545 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2546 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2547 nlocals = 0; 2548 first_index = -1; 2549 first_found = PETSC_FALSE; 2550 for (i=0;i<s;i++) { 2551 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2552 first_found = PETSC_TRUE; 2553 first_index = i; 2554 } 2555 nlocals += (PetscInt)PetscRealPart(array[i]); 2556 } 2557 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2558 if (!rank_prec_comm) { 2559 dof_displs[0]=0; 2560 for (i=1;i<size_prec_comm;i++) { 2561 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2562 } 2563 } 2564 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2565 if (first_found) { 2566 array[first_index] += (PetscScalar)nlocals; 2567 old_index = first_index; 2568 for (i=first_index+1;i<s;i++) { 2569 if (PetscRealPart(array[i]) > 0.1) { 2570 array[i] += array[old_index]; 2571 old_index = i; 2572 } 2573 } 2574 } 2575 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2576 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2577 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2578 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2579 /* get global ordering of local dofs */ 2580 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2581 if (local_dofs_mult) { 2582 for (i=0;i<n_local_dofs;i++) { 2583 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2584 } 2585 } else { 2586 for (i=0;i<n_local_dofs;i++) { 2587 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2588 } 2589 } 2590 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2591 /* free workspace */ 2592 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2593 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2594 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2595 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2596 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2597 /* return pointer to global ordering of local dofs */ 2598 *global_numbering_subset = temp_global_dofs; 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2604 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2605 { 2606 PetscInt i,j; 2607 PetscScalar *alphas; 2608 PetscErrorCode ierr; 2609 2610 PetscFunctionBegin; 2611 /* this implements stabilized Gram-Schmidt */ 2612 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2613 for (i=0;i<n;i++) { 2614 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2615 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2616 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2617 } 2618 ierr = PetscFree(alphas);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 #undef __FUNCT__ 2623 #define __FUNCT__ "MatISGetSubassemblingPattern" 2624 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2625 { 2626 Mat subdomain_adj; 2627 IS new_ranks,ranks_send_to; 2628 MatPartitioning partitioner; 2629 Mat_IS *matis; 2630 PetscInt n_neighs,*neighs,*n_shared,**shared; 2631 PetscInt prank; 2632 PetscMPIInt size,rank,color; 2633 PetscInt *xadj,*adjncy,*oldranks; 2634 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2635 PetscInt i,j,local_size,threshold=0; 2636 PetscErrorCode ierr; 2637 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2638 PetscSubcomm subcomm; 2639 2640 PetscFunctionBegin; 2641 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2642 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2643 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2644 2645 /* Get info on mapping */ 2646 matis = (Mat_IS*)(mat->data); 2647 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2648 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2649 2650 /* build local CSR graph of subdomains' connectivity */ 2651 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2652 xadj[0] = 0; 2653 xadj[1] = PetscMax(n_neighs-1,0); 2654 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2655 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2656 2657 if (threshold) { 2658 PetscInt* count,min_threshold; 2659 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2660 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2661 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2662 for (j=0;j<n_shared[i];j++) { 2663 count[shared[i][j]] += 1; 2664 } 2665 } 2666 /* adapt threshold since we dont want to lose significant connections */ 2667 min_threshold = n_neighs; 2668 for (i=1;i<n_neighs;i++) { 2669 for (j=0;j<n_shared[i];j++) { 2670 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2671 } 2672 } 2673 threshold = PetscMax(min_threshold+1,threshold); 2674 xadj[1] = 0; 2675 for (i=1;i<n_neighs;i++) { 2676 for (j=0;j<n_shared[i];j++) { 2677 if (count[shared[i][j]] < threshold) { 2678 adjncy[xadj[1]] = neighs[i]; 2679 adjncy_wgt[xadj[1]] = n_shared[i]; 2680 xadj[1]++; 2681 break; 2682 } 2683 } 2684 } 2685 ierr = PetscFree(count);CHKERRQ(ierr); 2686 } else { 2687 if (xadj[1]) { 2688 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2689 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2690 } 2691 } 2692 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2693 if (use_square) { 2694 for (i=0;i<xadj[1];i++) { 2695 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2696 } 2697 } 2698 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2699 2700 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2701 2702 /* 2703 Restrict work on active processes only. 2704 */ 2705 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2706 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2707 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2708 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2709 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2710 if (color) { 2711 ierr = PetscFree(xadj);CHKERRQ(ierr); 2712 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2713 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2714 } else { 2715 PetscInt coarsening_ratio; 2716 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2717 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2718 prank = rank; 2719 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2720 /* 2721 for (i=0;i<size;i++) { 2722 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2723 } 2724 */ 2725 for (i=0;i<xadj[1];i++) { 2726 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2727 } 2728 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2729 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2730 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2731 2732 /* Partition */ 2733 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2734 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2735 if (use_vwgt) { 2736 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2737 v_wgt[0] = local_size; 2738 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2739 } 2740 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2741 coarsening_ratio = size/n_subdomains; 2742 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2743 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2744 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2745 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2746 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2747 2748 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2749 if (contiguous) { 2750 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2751 } else { 2752 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2753 } 2754 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2755 /* clean up */ 2756 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2757 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2758 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2759 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2760 } 2761 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2762 2763 /* assemble parallel IS for sends */ 2764 i = 1; 2765 if (color) i=0; 2766 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2767 2768 /* get back IS */ 2769 *is_sends = ranks_send_to; 2770 PetscFunctionReturn(0); 2771 } 2772 2773 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2774 2775 #undef __FUNCT__ 2776 #define __FUNCT__ "MatISSubassemble" 2777 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2778 { 2779 Mat local_mat; 2780 Mat_IS *matis; 2781 IS is_sends_internal; 2782 PetscInt rows,cols; 2783 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2784 PetscBool ismatis,isdense,destroy_mat; 2785 ISLocalToGlobalMapping l2gmap; 2786 PetscInt* l2gmap_indices; 2787 const PetscInt* is_indices; 2788 MatType new_local_type; 2789 /* buffers */ 2790 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2791 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2792 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2793 /* MPI */ 2794 MPI_Comm comm,comm_n; 2795 PetscSubcomm subcomm; 2796 PetscMPIInt n_sends,n_recvs,commsize; 2797 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2798 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2799 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2800 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2801 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 /* TODO: add missing checks */ 2806 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2807 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2808 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2809 PetscValidLogicalCollectiveInt(mat,nis,7); 2810 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2811 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2812 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2813 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2814 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2815 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2816 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2817 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2818 PetscInt mrows,mcols,mnrows,mncols; 2819 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2820 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2821 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2822 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2823 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2824 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2825 } 2826 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2827 PetscValidLogicalCollectiveInt(mat,bs,0); 2828 /* prepare IS for sending if not provided */ 2829 if (!is_sends) { 2830 PetscBool pcontig = PETSC_TRUE; 2831 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2832 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2833 } else { 2834 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2835 is_sends_internal = is_sends; 2836 } 2837 2838 /* get pointer of MATIS data */ 2839 matis = (Mat_IS*)mat->data; 2840 2841 /* get comm */ 2842 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2843 2844 /* compute number of sends */ 2845 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2846 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2847 2848 /* compute number of receives */ 2849 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2850 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2851 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2852 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2853 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2854 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2855 ierr = PetscFree(iflags);CHKERRQ(ierr); 2856 2857 /* restrict comm if requested */ 2858 subcomm = 0; 2859 destroy_mat = PETSC_FALSE; 2860 if (restrict_comm) { 2861 PetscMPIInt color,rank,subcommsize; 2862 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2863 color = 0; 2864 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2865 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2866 subcommsize = commsize - subcommsize; 2867 /* check if reuse has been requested */ 2868 if (reuse == MAT_REUSE_MATRIX) { 2869 if (*mat_n) { 2870 PetscMPIInt subcommsize2; 2871 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 2872 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 2873 comm_n = PetscObjectComm((PetscObject)*mat_n); 2874 } else { 2875 comm_n = PETSC_COMM_SELF; 2876 } 2877 } else { /* MAT_INITIAL_MATRIX */ 2878 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 2879 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 2880 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2881 comm_n = subcomm->comm; 2882 } 2883 /* flag to destroy *mat_n if not significative */ 2884 if (color) destroy_mat = PETSC_TRUE; 2885 } else { 2886 comm_n = comm; 2887 } 2888 2889 /* prepare send/receive buffers */ 2890 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 2891 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2892 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 2893 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2894 if (nis) { 2895 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 2896 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 2897 } 2898 2899 /* Get data from local matrices */ 2900 if (!isdense) { 2901 /* TODO: See below some guidelines on how to prepare the local buffers */ 2902 /* 2903 send_buffer_vals should contain the raw values of the local matrix 2904 send_buffer_idxs should contain: 2905 - MatType_PRIVATE type 2906 - PetscInt size_of_l2gmap 2907 - PetscInt global_row_indices[size_of_l2gmap] 2908 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2909 */ 2910 } else { 2911 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2912 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2913 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 2914 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2915 send_buffer_idxs[1] = i; 2916 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2917 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2918 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2919 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2920 for (i=0;i<n_sends;i++) { 2921 ilengths_vals[is_indices[i]] = len*len; 2922 ilengths_idxs[is_indices[i]] = len+2; 2923 } 2924 } 2925 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2926 /* additional is (if any) */ 2927 if (nis) { 2928 PetscMPIInt psum; 2929 PetscInt j; 2930 for (j=0,psum=0;j<nis;j++) { 2931 PetscInt plen; 2932 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2933 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 2934 psum += len+1; /* indices + lenght */ 2935 } 2936 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 2937 for (j=0,psum=0;j<nis;j++) { 2938 PetscInt plen; 2939 const PetscInt *is_array_idxs; 2940 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 2941 send_buffer_idxs_is[psum] = plen; 2942 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2943 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 2944 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 2945 psum += plen+1; /* indices + lenght */ 2946 } 2947 for (i=0;i<n_sends;i++) { 2948 ilengths_idxs_is[is_indices[i]] = psum; 2949 } 2950 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 2951 } 2952 2953 buf_size_idxs = 0; 2954 buf_size_vals = 0; 2955 buf_size_idxs_is = 0; 2956 for (i=0;i<n_recvs;i++) { 2957 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2958 buf_size_vals += (PetscInt)olengths_vals[i]; 2959 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 2960 } 2961 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 2962 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 2963 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 2964 2965 /* get new tags for clean communications */ 2966 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2967 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2968 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 2969 2970 /* allocate for requests */ 2971 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 2972 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 2973 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 2974 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 2975 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 2976 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 2977 2978 /* communications */ 2979 ptr_idxs = recv_buffer_idxs; 2980 ptr_vals = recv_buffer_vals; 2981 ptr_idxs_is = recv_buffer_idxs_is; 2982 for (i=0;i<n_recvs;i++) { 2983 source_dest = onodes[i]; 2984 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2985 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2986 ptr_idxs += olengths_idxs[i]; 2987 ptr_vals += olengths_vals[i]; 2988 if (nis) { 2989 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 2990 ptr_idxs_is += olengths_idxs_is[i]; 2991 } 2992 } 2993 for (i=0;i<n_sends;i++) { 2994 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2995 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2996 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2997 if (nis) { 2998 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 2999 } 3000 } 3001 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3002 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3003 3004 /* assemble new l2g map */ 3005 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3006 ptr_idxs = recv_buffer_idxs; 3007 buf_size_idxs = 0; 3008 for (i=0;i<n_recvs;i++) { 3009 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3010 ptr_idxs += olengths_idxs[i]; 3011 } 3012 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3013 ptr_idxs = recv_buffer_idxs; 3014 buf_size_idxs = 0; 3015 for (i=0;i<n_recvs;i++) { 3016 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3017 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3018 ptr_idxs += olengths_idxs[i]; 3019 } 3020 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3021 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3022 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3023 3024 /* infer new local matrix type from received local matrices type */ 3025 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3026 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3027 if (n_recvs) { 3028 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3029 ptr_idxs = recv_buffer_idxs; 3030 for (i=0;i<n_recvs;i++) { 3031 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3032 new_local_type_private = MATAIJ_PRIVATE; 3033 break; 3034 } 3035 ptr_idxs += olengths_idxs[i]; 3036 } 3037 switch (new_local_type_private) { 3038 case MATDENSE_PRIVATE: 3039 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3040 new_local_type = MATSEQAIJ; 3041 bs = 1; 3042 } else { /* if I receive only 1 dense matrix */ 3043 new_local_type = MATSEQDENSE; 3044 bs = 1; 3045 } 3046 break; 3047 case MATAIJ_PRIVATE: 3048 new_local_type = MATSEQAIJ; 3049 bs = 1; 3050 break; 3051 case MATBAIJ_PRIVATE: 3052 new_local_type = MATSEQBAIJ; 3053 break; 3054 case MATSBAIJ_PRIVATE: 3055 new_local_type = MATSEQSBAIJ; 3056 break; 3057 default: 3058 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3059 break; 3060 } 3061 } else { /* by default, new_local_type is seqdense */ 3062 new_local_type = MATSEQDENSE; 3063 bs = 1; 3064 } 3065 3066 /* create MATIS object if needed */ 3067 if (reuse == MAT_INITIAL_MATRIX) { 3068 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3069 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3070 } else { 3071 /* it also destroys the local matrices */ 3072 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3073 } 3074 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3075 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3076 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3077 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3078 3079 /* set values */ 3080 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3081 ptr_vals = recv_buffer_vals; 3082 ptr_idxs = recv_buffer_idxs; 3083 for (i=0;i<n_recvs;i++) { 3084 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3085 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3086 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3087 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3088 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3089 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3090 } else { 3091 /* TODO */ 3092 } 3093 ptr_idxs += olengths_idxs[i]; 3094 ptr_vals += olengths_vals[i]; 3095 } 3096 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3097 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3098 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3099 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3100 3101 #if 0 3102 if (!restrict_comm) { /* check */ 3103 Vec lvec,rvec; 3104 PetscReal infty_error; 3105 3106 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3107 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3108 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3109 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3110 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3111 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3112 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3113 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3114 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3115 } 3116 #endif 3117 3118 /* assemble new additional is (if any) */ 3119 if (nis) { 3120 PetscInt **temp_idxs,*count_is,j,psum; 3121 3122 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3123 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3124 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3125 ptr_idxs = recv_buffer_idxs_is; 3126 psum = 0; 3127 for (i=0;i<n_recvs;i++) { 3128 for (j=0;j<nis;j++) { 3129 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3130 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3131 psum += plen; 3132 ptr_idxs += plen+1; /* shift pointer to received data */ 3133 } 3134 } 3135 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3136 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3137 for (i=1;i<nis;i++) { 3138 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3139 } 3140 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3141 ptr_idxs = recv_buffer_idxs_is; 3142 for (i=0;i<n_recvs;i++) { 3143 for (j=0;j<nis;j++) { 3144 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3145 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3146 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3147 ptr_idxs += plen+1; /* shift pointer to received data */ 3148 } 3149 } 3150 for (i=0;i<nis;i++) { 3151 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3152 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3153 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3154 } 3155 ierr = PetscFree(count_is);CHKERRQ(ierr); 3156 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3157 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3158 } 3159 /* free workspace */ 3160 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3161 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3162 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3163 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3164 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3165 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3166 if (isdense) { 3167 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3168 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3169 } else { 3170 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3171 } 3172 if (nis) { 3173 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3174 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3175 } 3176 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3177 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3178 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3179 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3180 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3181 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3182 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3183 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3184 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3185 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3186 ierr = PetscFree(onodes);CHKERRQ(ierr); 3187 if (nis) { 3188 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3189 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3190 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3191 } 3192 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3193 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3194 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3195 for (i=0;i<nis;i++) { 3196 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3197 } 3198 } 3199 PetscFunctionReturn(0); 3200 } 3201 3202 /* temporary hack into ksp private data structure */ 3203 #include <petsc-private/kspimpl.h> 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3207 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3208 { 3209 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3210 PC_IS *pcis = (PC_IS*)pc->data; 3211 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3212 MatNullSpace CoarseNullSpace=NULL; 3213 ISLocalToGlobalMapping coarse_islg; 3214 IS coarse_is,*isarray; 3215 PetscInt i,im_active=-1,active_procs=-1; 3216 PetscInt nis,nisdofs,nisneu; 3217 PC pc_temp; 3218 PCType coarse_pc_type; 3219 KSPType coarse_ksp_type; 3220 PetscBool multilevel_requested,multilevel_allowed; 3221 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3222 Mat t_coarse_mat_is; 3223 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3224 PetscMPIInt all_procs; 3225 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3226 PetscBool compute_vecs = PETSC_FALSE; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 /* Assign global numbering to coarse dofs */ 3231 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3232 compute_vecs = PETSC_TRUE; 3233 PetscInt ocoarse_size; 3234 ocoarse_size = pcbddc->coarse_size; 3235 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3236 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3237 /* see if we can avoid some work */ 3238 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3239 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3240 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3241 coarse_reuse = PETSC_FALSE; 3242 } else { /* we can safely reuse already computed coarse matrix */ 3243 coarse_reuse = PETSC_TRUE; 3244 } 3245 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3246 coarse_reuse = PETSC_FALSE; 3247 } 3248 /* reset any subassembling information */ 3249 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3250 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3251 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3252 coarse_reuse = PETSC_TRUE; 3253 } 3254 3255 /* count "active" (i.e. with positive local size) and "void" processes */ 3256 im_active = !!(pcis->n); 3257 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3258 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3259 void_procs = all_procs-active_procs; 3260 csin_type_simple = PETSC_TRUE; 3261 if (pcbddc->current_level) { 3262 csin_ml = PETSC_TRUE; 3263 ncoarse_ml = void_procs; 3264 csin_ds = PETSC_TRUE; 3265 ncoarse_ds = void_procs; 3266 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3267 } else { 3268 csin_ml = PETSC_FALSE; 3269 ncoarse_ml = all_procs; 3270 if (void_procs) { 3271 csin_ds = PETSC_TRUE; 3272 ncoarse_ds = void_procs; 3273 csin_type_simple = PETSC_FALSE; 3274 } else { 3275 csin_ds = PETSC_FALSE; 3276 ncoarse_ds = all_procs; 3277 } 3278 } 3279 3280 /* 3281 test if we can go multilevel: three conditions must be satisfied: 3282 - we have not exceeded the number of levels requested 3283 - we can actually subassemble the active processes 3284 - we can find a suitable number of MPI processes where we can place the subassembled problem 3285 */ 3286 multilevel_allowed = PETSC_FALSE; 3287 multilevel_requested = PETSC_FALSE; 3288 if (pcbddc->current_level < pcbddc->max_levels) { 3289 multilevel_requested = PETSC_TRUE; 3290 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3291 multilevel_allowed = PETSC_FALSE; 3292 } else { 3293 multilevel_allowed = PETSC_TRUE; 3294 } 3295 } 3296 /* determine number of process partecipating to coarse solver */ 3297 if (multilevel_allowed) { 3298 ncoarse = ncoarse_ml; 3299 csin = csin_ml; 3300 } else { 3301 ncoarse = ncoarse_ds; 3302 csin = csin_ds; 3303 } 3304 3305 /* creates temporary l2gmap and IS for coarse indexes */ 3306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3307 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3308 3309 /* creates temporary MATIS object for coarse matrix */ 3310 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3311 #if 0 3312 { 3313 PetscViewer viewer; 3314 char filename[256]; 3315 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3316 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3317 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3318 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3319 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3320 } 3321 #endif 3322 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3323 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3324 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3325 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3327 3328 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3329 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3330 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3331 const PetscInt *idxs; 3332 ISLocalToGlobalMapping tmap; 3333 3334 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3335 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3336 /* allocate space for temporary storage */ 3337 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3338 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3339 /* allocate for IS array */ 3340 nisdofs = pcbddc->n_ISForDofsLocal; 3341 nisneu = !!pcbddc->NeumannBoundariesLocal; 3342 nis = nisdofs + nisneu; 3343 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3344 /* dofs splitting */ 3345 for (i=0;i<nisdofs;i++) { 3346 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3347 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3348 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3349 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3350 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3351 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3352 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3353 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3354 } 3355 /* neumann boundaries */ 3356 if (pcbddc->NeumannBoundariesLocal) { 3357 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3358 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3359 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3360 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3361 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3362 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3363 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3364 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3365 } 3366 /* free memory */ 3367 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3368 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3369 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3370 } else { 3371 nis = 0; 3372 nisdofs = 0; 3373 nisneu = 0; 3374 isarray = NULL; 3375 } 3376 /* destroy no longer needed map */ 3377 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3378 3379 /* restrict on coarse candidates (if needed) */ 3380 coarse_mat_is = NULL; 3381 if (csin) { 3382 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3383 PetscInt j,tissize,*nisindices; 3384 PetscInt *coarse_candidates; 3385 const PetscInt* tisindices; 3386 /* get coarse candidates' ranks in pc communicator */ 3387 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3388 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3389 for (i=0,j=0;i<all_procs;i++) { 3390 if (!coarse_candidates[i]) { 3391 coarse_candidates[j]=i; 3392 j++; 3393 } 3394 } 3395 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3396 /* get a suitable subassembling pattern */ 3397 if (csin_type_simple) { 3398 PetscMPIInt rank; 3399 PetscInt issize,isidx; 3400 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3401 if (im_active) { 3402 issize = 1; 3403 isidx = (PetscInt)rank; 3404 } else { 3405 issize = 0; 3406 isidx = -1; 3407 } 3408 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3409 } else { 3410 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3411 } 3412 if (pcbddc->dbg_flag) { 3413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3415 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3417 for (i=0;i<j;i++) { 3418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3419 } 3420 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3422 } 3423 /* shift the pattern on coarse candidates */ 3424 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3425 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3426 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3427 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3428 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3429 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3430 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3431 } 3432 if (pcbddc->dbg_flag) { 3433 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3435 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3436 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3437 } 3438 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3439 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3440 } else { 3441 if (pcbddc->dbg_flag) { 3442 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3443 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3444 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3445 } 3446 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3447 coarse_mat_is = t_coarse_mat_is; 3448 } 3449 3450 /* create local to global scatters for coarse problem */ 3451 if (compute_vecs) { 3452 PetscInt lrows; 3453 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3454 if (coarse_mat_is) { 3455 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3456 } else { 3457 lrows = 0; 3458 } 3459 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3460 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3461 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3462 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3463 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3464 } 3465 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3466 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3467 3468 /* set defaults for coarse KSP and PC */ 3469 if (multilevel_allowed) { 3470 coarse_ksp_type = KSPRICHARDSON; 3471 coarse_pc_type = PCBDDC; 3472 } else { 3473 coarse_ksp_type = KSPPREONLY; 3474 coarse_pc_type = PCREDUNDANT; 3475 } 3476 3477 /* print some info if requested */ 3478 if (pcbddc->dbg_flag) { 3479 if (!multilevel_allowed) { 3480 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3481 if (multilevel_requested) { 3482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3483 } else if (pcbddc->max_levels) { 3484 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3485 } 3486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3487 } 3488 } 3489 3490 /* create the coarse KSP object only once with defaults */ 3491 if (coarse_mat_is) { 3492 MatReuse coarse_mat_reuse; 3493 PetscViewer dbg_viewer = NULL; 3494 if (pcbddc->dbg_flag) { 3495 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3496 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3497 } 3498 if (!pcbddc->coarse_ksp) { 3499 char prefix[256],str_level[16]; 3500 size_t len; 3501 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3502 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3503 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3504 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3505 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3506 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3507 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3508 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3509 /* prefix */ 3510 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3511 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3512 if (!pcbddc->current_level) { 3513 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3514 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3515 } else { 3516 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3517 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3518 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3519 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3520 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3521 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3522 } 3523 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3524 /* allow user customization */ 3525 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3526 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3527 } 3528 3529 /* get some info after set from options */ 3530 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3531 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3532 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3533 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3534 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3535 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3536 isbddc = PETSC_FALSE; 3537 } 3538 if (isredundant) { 3539 KSP inner_ksp; 3540 PC inner_pc; 3541 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3542 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3543 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3544 } 3545 3546 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3547 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3548 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3549 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3550 if (nisdofs) { 3551 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3552 for (i=0;i<nisdofs;i++) { 3553 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3554 } 3555 } 3556 if (nisneu) { 3557 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3558 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3559 } 3560 3561 /* assemble coarse matrix */ 3562 if (coarse_reuse) { 3563 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3564 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3565 coarse_mat_reuse = MAT_REUSE_MATRIX; 3566 } else { 3567 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3568 } 3569 if (isbddc || isnn) { 3570 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3571 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3572 if (pcbddc->dbg_flag) { 3573 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3574 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3575 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3576 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3577 } 3578 } 3579 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3580 } else { 3581 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3582 } 3583 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3584 3585 /* propagate symmetry info to coarse matrix */ 3586 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3587 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3588 3589 /* set operators */ 3590 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3591 if (pcbddc->dbg_flag) { 3592 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3593 } 3594 } else { /* processes non partecipating to coarse solver (if any) */ 3595 coarse_mat = 0; 3596 } 3597 ierr = PetscFree(isarray);CHKERRQ(ierr); 3598 #if 0 3599 { 3600 PetscViewer viewer; 3601 char filename[256]; 3602 sprintf(filename,"coarse_mat.m"); 3603 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3604 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3605 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3606 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3607 } 3608 #endif 3609 3610 /* Compute coarse null space (special handling by BDDC only) */ 3611 if (pcbddc->NullSpace) { 3612 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3613 } 3614 3615 if (pcbddc->coarse_ksp) { 3616 Vec crhs,csol; 3617 PetscBool ispreonly; 3618 if (CoarseNullSpace) { 3619 if (isbddc) { 3620 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3621 } else { 3622 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3623 } 3624 } 3625 /* setup coarse ksp */ 3626 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3627 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3628 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3629 /* hack */ 3630 if (!csol) { 3631 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3632 } 3633 if (!crhs) { 3634 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3635 } 3636 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3637 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3638 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3639 KSP check_ksp; 3640 KSPType check_ksp_type; 3641 PC check_pc; 3642 Vec check_vec,coarse_vec; 3643 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3644 PetscInt its; 3645 PetscBool compute_eigs; 3646 PetscReal *eigs_r,*eigs_c; 3647 PetscInt neigs; 3648 const char *prefix; 3649 3650 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3651 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3652 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3653 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3654 if (ispreonly) { 3655 check_ksp_type = KSPPREONLY; 3656 compute_eigs = PETSC_FALSE; 3657 } else { 3658 check_ksp_type = KSPGMRES; 3659 compute_eigs = PETSC_TRUE; 3660 } 3661 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3662 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3663 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3664 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3665 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3666 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3667 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3668 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3669 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3670 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3671 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3672 /* create random vec */ 3673 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3674 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3675 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3676 if (CoarseNullSpace) { 3677 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3678 } 3679 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3680 /* solve coarse problem */ 3681 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3682 if (CoarseNullSpace) { 3683 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3684 } 3685 /* set eigenvalue estimation if preonly has not been requested */ 3686 if (compute_eigs) { 3687 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3688 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3689 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3690 lambda_max = eigs_r[neigs-1]; 3691 lambda_min = eigs_r[0]; 3692 if (pcbddc->use_coarse_estimates) { 3693 if (lambda_max>lambda_min) { 3694 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3695 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3696 } 3697 } 3698 } 3699 3700 /* check coarse problem residual error */ 3701 if (pcbddc->dbg_flag) { 3702 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3703 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3704 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3705 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3706 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3707 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3708 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3709 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3710 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3711 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3712 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3713 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3714 if (compute_eigs) { 3715 PetscReal lambda_max_s,lambda_min_s; 3716 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3717 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3718 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3719 for (i=0;i<neigs;i++) { 3720 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3721 } 3722 } 3723 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3724 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3725 } 3726 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3727 if (compute_eigs) { 3728 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3729 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3730 } 3731 } 3732 } 3733 /* print additional info */ 3734 if (pcbddc->dbg_flag) { 3735 /* waits until all processes reaches this point */ 3736 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3737 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3738 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3739 } 3740 3741 /* free memory */ 3742 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3743 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3744 PetscFunctionReturn(0); 3745 } 3746 3747 #undef __FUNCT__ 3748 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3749 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3750 { 3751 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3752 PC_IS* pcis = (PC_IS*)pc->data; 3753 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3754 PetscInt i,coarse_size; 3755 PetscInt *local_primal_indices; 3756 PetscErrorCode ierr; 3757 3758 PetscFunctionBegin; 3759 /* Compute global number of coarse dofs */ 3760 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3761 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3762 } 3763 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3764 3765 /* check numbering */ 3766 if (pcbddc->dbg_flag) { 3767 PetscScalar coarsesum,*array; 3768 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3769 3770 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3771 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3773 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3774 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3775 for (i=0;i<pcbddc->local_primal_size;i++) { 3776 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3777 } 3778 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3779 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3780 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3781 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3782 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3783 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3784 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3785 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3786 for (i=0;i<pcis->n;i++) { 3787 if (array[i] == 1.0) { 3788 set_error = PETSC_TRUE; 3789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3790 } 3791 } 3792 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3793 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3794 for (i=0;i<pcis->n;i++) { 3795 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3796 } 3797 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3798 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3799 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3800 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3801 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3802 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3803 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3804 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3805 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3807 for (i=0;i<pcbddc->local_primal_size;i++) { 3808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3809 } 3810 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3811 } 3812 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3813 if (set_error_reduced) { 3814 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3815 } 3816 } 3817 /* get back data */ 3818 *coarse_size_n = coarse_size; 3819 *local_primal_indices_n = local_primal_indices; 3820 PetscFunctionReturn(0); 3821 } 3822 3823 #undef __FUNCT__ 3824 #define __FUNCT__ "PCBDDCGlobalToLocal" 3825 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3826 { 3827 IS localis_t; 3828 PetscInt i,lsize,*idxs,n; 3829 PetscScalar *vals; 3830 PetscErrorCode ierr; 3831 3832 PetscFunctionBegin; 3833 /* get indices in local ordering exploiting local to global map */ 3834 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3835 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3836 for (i=0;i<lsize;i++) vals[i] = 1.0; 3837 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3838 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 3839 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 3840 if (idxs) { /* multilevel guard */ 3841 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3842 } 3843 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 3844 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3845 ierr = PetscFree(vals);CHKERRQ(ierr); 3846 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 3847 /* now compute set in local ordering */ 3848 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3849 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3850 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3851 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 3852 for (i=0,lsize=0;i<n;i++) { 3853 if (PetscRealPart(vals[i]) > 0.5) { 3854 lsize++; 3855 } 3856 } 3857 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3858 for (i=0,lsize=0;i<n;i++) { 3859 if (PetscRealPart(vals[i]) > 0.5) { 3860 idxs[lsize++] = i; 3861 } 3862 } 3863 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3864 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3865 *localis = localis_t; 3866 PetscFunctionReturn(0); 3867 } 3868