1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 115 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 116 { 117 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 118 PC_IS *pcis = (PC_IS*)pc->data; 119 VecType impVecType; 120 PetscInt n_constraints,n_R,old_size; 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 if (!pcbddc->ConstraintMatrix) { 125 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 126 } 127 /* get sizes */ 128 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 129 n_R = pcis->n-pcbddc->n_actual_vertices; 130 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 131 /* local work vectors (try to avoid unneeded work)*/ 132 /* R nodes */ 133 old_size = -1; 134 if (pcbddc->vec1_R) { 135 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 136 } 137 if (n_R != old_size) { 138 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 139 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 140 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 142 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 143 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 144 } 145 /* local primal dofs */ 146 old_size = -1; 147 if (pcbddc->vec1_P) { 148 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 149 } 150 if (pcbddc->local_primal_size != old_size) { 151 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 152 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 154 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 155 } 156 /* local explicit constraints */ 157 old_size = -1; 158 if (pcbddc->vec1_C) { 159 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 160 } 161 if (n_constraints && n_constraints != old_size) { 162 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 163 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 165 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "PCBDDCSetUpCorrection" 172 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 173 { 174 PetscErrorCode ierr; 175 /* pointers to pcis and pcbddc */ 176 PC_IS* pcis = (PC_IS*)pc->data; 177 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 178 /* submatrices of local problem */ 179 Mat A_RV,A_VR,A_VV; 180 /* working matrices */ 181 Mat M1,M2,M3,C_CR; 182 /* working vectors */ 183 Vec vec1_C,vec2_C,vec1_V,vec2_V; 184 /* additional working stuff */ 185 IS is_aux; 186 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 187 const PetscScalar *array,*row_cmat_values; 188 const PetscInt *row_cmat_indices,*idx_R_local; 189 PetscInt *idx_V_B,*auxindices; 190 PetscInt n_vertices,n_constraints,size_of_constraint; 191 PetscInt i,j,n_R,n_D,n_B; 192 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 193 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 194 MatType impMatType; 195 /* some shortcuts to scalars */ 196 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 197 /* for debugging purposes */ 198 PetscReal *coarsefunctions_errors,*constraints_errors; 199 200 PetscFunctionBegin; 201 /* get number of vertices (corners plus constraints with change of basis) 202 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 203 n_vertices = pcbddc->n_actual_vertices; 204 n_constraints = pcbddc->local_primal_size-n_vertices; 205 /* Set Non-overlapping dimensions */ 206 n_B = pcis->n_B; n_D = pcis->n - n_B; 207 n_R = pcis->n-n_vertices; 208 209 /* Set types for local objects needed by BDDC precondtioner */ 210 impMatType = MATSEQDENSE; 211 212 /* Allocating some extra storage just to be safe */ 213 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 214 for (i=0;i<pcis->n;i++) auxindices[i]=i; 215 216 /* vertices in boundary numbering */ 217 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 218 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 219 if (i != n_vertices) { 220 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 221 } 222 223 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 224 if (n_constraints) { 225 /* see if we can save some allocations */ 226 if (pcbddc->local_auxmat2) { 227 PetscInt on_R,on_constraints; 228 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 229 if (on_R != n_R || on_constraints != n_constraints) { 230 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 231 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 232 } 233 } 234 /* work vectors */ 235 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 237 /* auxiliary matrices */ 238 if (!pcbddc->local_auxmat2) { 239 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 240 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 241 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 242 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 243 } 244 245 /* Extract constraints on R nodes: C_{CR} */ 246 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 247 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 248 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 249 250 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 251 for (i=0;i<n_constraints;i++) { 252 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 253 /* Get row of constraint matrix in R numbering */ 254 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 255 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 256 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 258 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 259 /* Solve for row of constraint matrix in R numbering */ 260 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 261 /* Set values in local_auxmat2 */ 262 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 263 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 264 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 } 266 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 267 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 269 270 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 271 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 272 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 273 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 274 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 275 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 276 ierr = MatSetUp(M1);CHKERRQ(ierr); 277 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 278 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 279 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 280 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 281 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 282 ierr = MatDestroy(&M2);CHKERRQ(ierr); 283 ierr = MatDestroy(&M3);CHKERRQ(ierr); 284 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 285 if (!pcbddc->local_auxmat1) { 286 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 287 } else { 288 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } 290 } 291 292 /* Get submatrices from subdomain matrix */ 293 if (n_vertices) { 294 PetscInt ibs,mbs; 295 PetscBool issbaij; 296 Mat newmat; 297 298 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 299 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 300 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 301 if (ibs != mbs) { /* need to convert to SEQAIJ */ 302 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 303 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 306 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 307 } else { 308 /* this is safe */ 309 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 310 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 311 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 312 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 313 /* which of the two approaches is faster? */ 314 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 315 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 316 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 318 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 319 } else { 320 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 322 } 323 } 324 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 325 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 326 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 327 } 328 329 /* Matrix of coarse basis functions (local) */ 330 if (pcbddc->coarse_phi_B) { 331 PetscInt on_B,on_primal; 332 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 333 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 334 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 335 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 336 } 337 } 338 if (pcbddc->coarse_phi_D) { 339 PetscInt on_D,on_primal; 340 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 341 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 342 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 343 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 344 } 345 } 346 if (!pcbddc->coarse_phi_B) { 347 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 348 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 349 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 350 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 } 352 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 } 358 359 if (pcbddc->dbg_flag) { 360 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 361 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 363 } 364 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 365 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 368 369 /* vertices */ 370 for (i=0;i<n_vertices;i++) { 371 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 372 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 373 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 374 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 375 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 376 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 377 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 378 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 379 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 381 if (n_constraints) { 382 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 383 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 384 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 385 } 386 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 387 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 388 389 /* Set values in coarse basis function and subdomain part of coarse_mat */ 390 /* coarse basis functions */ 391 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 392 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 393 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 395 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 398 if (pcbddc->switch_static || pcbddc->dbg_flag) { 399 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 402 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 403 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 } 405 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 406 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 407 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 408 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 if (n_constraints) { 410 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 } 414 415 /* check */ 416 if (pcbddc->dbg_flag) { 417 /* assemble subdomain vector on local nodes */ 418 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 419 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 420 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 424 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 425 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 426 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 427 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 428 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 if (n_constraints) { 431 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 432 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 } 435 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 438 /* check saddle point solution */ 439 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 440 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 442 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 443 /* shift by the identity matrix */ 444 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 445 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 446 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 448 } 449 } 450 451 /* constraints */ 452 for (i=0;i<n_constraints;i++) { 453 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 454 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 455 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 456 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 457 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 458 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 459 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 460 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 461 if (n_vertices) { 462 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 463 } 464 /* Set values in coarse basis function and subdomain part of coarse_mat */ 465 /* coarse basis functions */ 466 j = i+n_vertices; /* don't touch this! */ 467 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 468 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 469 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 471 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 472 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 if (pcbddc->switch_static || pcbddc->dbg_flag) { 474 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 } 480 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 481 if (n_vertices) { 482 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 483 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 } 486 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 487 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 490 if (pcbddc->dbg_flag) { 491 /* assemble subdomain vector on nodes */ 492 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 493 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 494 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 495 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 497 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 498 /* assemble subdomain vector of lagrange multipliers */ 499 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 500 if (n_vertices) { 501 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 502 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 } 505 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 506 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 507 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 509 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 511 /* check saddle point solution */ 512 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 513 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 515 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 516 /* shift by the identity matrix */ 517 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 521 } 522 } 523 /* call assembling routines for local coarse basis */ 524 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 525 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 if (pcbddc->switch_static || pcbddc->dbg_flag) { 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 } 530 531 /* compute other basis functions for non-symmetric problems */ 532 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 533 if (!setsym || (setsym && !issym)) { 534 if (!pcbddc->coarse_psi_B) { 535 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 536 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 537 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 538 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 } 540 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 541 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 542 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 543 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 544 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 } 546 for (i=0;i<pcbddc->local_primal_size;i++) { 547 if (n_constraints) { 548 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 549 for (j=0;j<n_constraints;j++) { 550 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 551 } 552 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 553 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 554 } 555 if (i<n_vertices) { 556 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 557 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 558 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 559 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 560 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 561 if (n_constraints) { 562 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 563 } 564 } else { 565 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 568 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 569 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 570 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 572 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 573 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 if (i<n_vertices) { 575 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 576 } 577 if (pcbddc->switch_static || pcbddc->dbg_flag) { 578 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 579 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 581 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 582 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 } 584 585 if (pcbddc->dbg_flag) { 586 /* assemble subdomain vector on nodes */ 587 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 588 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 589 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 590 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 if (i<n_vertices) { 592 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 593 } 594 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 595 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 596 /* assemble subdomain vector of lagrange multipliers */ 597 for (j=0;j<pcbddc->local_primal_size;j++) { 598 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 599 } 600 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 601 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 602 /* check saddle point solution */ 603 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 604 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 606 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 607 /* shift by the identity matrix */ 608 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 609 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 610 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 612 } 613 } 614 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 615 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 if (pcbddc->switch_static || pcbddc->dbg_flag) { 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 } 620 } 621 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 622 /* Checking coarse_sub_mat and coarse basis functios */ 623 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 624 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 625 if (pcbddc->dbg_flag) { 626 Mat coarse_sub_mat; 627 Mat AUXMAT,TM1,TM2,TM3,TM4; 628 Mat coarse_phi_D,coarse_phi_B; 629 Mat coarse_psi_D,coarse_psi_B; 630 Mat A_II,A_BB,A_IB,A_BI; 631 MatType checkmattype=MATSEQAIJ; 632 PetscReal real_value; 633 634 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 635 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 636 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 637 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 638 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 639 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 640 if (pcbddc->coarse_psi_B) { 641 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 642 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 643 } 644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 645 646 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 647 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 648 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 649 if (pcbddc->coarse_psi_B) { 650 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 651 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 652 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 653 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 654 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 655 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 656 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 657 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 658 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 659 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 660 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 661 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 662 } else { 663 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 665 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 } 672 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 673 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 674 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 675 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 676 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 678 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 683 for (i=0;i<pcbddc->local_primal_size;i++) { 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 685 } 686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 687 for (i=0;i<pcbddc->local_primal_size;i++) { 688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 689 } 690 if (pcbddc->coarse_psi_B) { 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 692 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 694 } 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 696 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 698 } 699 } 700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 701 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 702 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 703 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 705 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 706 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 707 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 709 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 710 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 711 if (pcbddc->coarse_psi_B) { 712 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 713 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 714 } 715 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 716 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 717 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 718 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 719 } 720 /* free memory */ 721 if (n_vertices) { 722 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 723 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 724 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 725 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 727 } 728 if (n_constraints) { 729 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 730 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 731 ierr = MatDestroy(&M1);CHKERRQ(ierr); 732 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 733 } 734 ierr = PetscFree(auxindices);CHKERRQ(ierr); 735 /* get back data */ 736 *coarse_submat_vals_n = coarse_submat_vals; 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 742 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 743 { 744 PC_IS* pcis = (PC_IS*)(pc->data); 745 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 746 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 747 PetscBool issbaij,isbaij; 748 /* manage repeated solves */ 749 MatReuse reuse; 750 MatStructure matstruct; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 755 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 756 } 757 /* get mat flags */ 758 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 759 reuse = MAT_INITIAL_MATRIX; 760 if (pc->setupcalled) { 761 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 762 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 763 if (matstruct == SAME_NONZERO_PATTERN) { 764 reuse = MAT_REUSE_MATRIX; 765 } else { 766 reuse = MAT_INITIAL_MATRIX; 767 } 768 } 769 if (reuse == MAT_INITIAL_MATRIX) { 770 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 771 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 773 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 774 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 775 } 776 777 /* transform local matrices if needed */ 778 if (pcbddc->use_change_of_basis) { 779 Mat change_mat_all; 780 PetscScalar *row_cmat_values; 781 PetscInt *row_cmat_indices; 782 PetscInt *nnz,*is_indices,*temp_indices; 783 PetscInt i,j,k,n_D,n_B; 784 785 /* Get Non-overlapping dimensions */ 786 n_B = pcis->n_B; 787 n_D = pcis->n-n_B; 788 789 /* compute nonzero structure of change of basis on all local nodes */ 790 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 791 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 792 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 793 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 794 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 795 k=1; 796 for (i=0;i<n_B;i++) { 797 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 798 nnz[is_indices[i]]=j; 799 if (k < j) k = j; 800 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 801 } 802 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 /* assemble change of basis matrix on the whole set of local dofs */ 804 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 805 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 806 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 807 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 808 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 809 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 810 for (i=0;i<n_D;i++) { 811 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 812 } 813 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 815 for (i=0;i<n_B;i++) { 816 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 817 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 818 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 819 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 824 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 825 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 826 if (!issbaij && !isbaij) { 827 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 828 } else { 829 Mat work_mat; 830 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 831 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 832 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 833 } 834 /* 835 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 836 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 837 */ 838 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 839 ierr = PetscFree(nnz);CHKERRQ(ierr); 840 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 841 } else { 842 /* without change of basis, the local matrix is unchanged */ 843 if (!pcbddc->local_mat) { 844 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 845 pcbddc->local_mat = matis->A; 846 } 847 } 848 849 /* get submatrices */ 850 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 851 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 852 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 853 if (!issbaij) { 854 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 856 } else { 857 Mat newmat; 858 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 859 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 861 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 862 } 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 868 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 869 { 870 PC_IS* pcis = (PC_IS*)(pc->data); 871 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 872 IS is_aux1,is_aux2; 873 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 874 PetscInt n_vertices,i,j,n_R,n_D,n_B; 875 PetscInt vbs,bs; 876 PetscBT bitmask; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 /* No need to setup local scatters if primal space is unchanged */ 881 if (!pcbddc->new_primal_space_local) { 882 PetscFunctionReturn(0); 883 } 884 /* destroy old objects */ 885 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 886 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 887 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 888 /* Set Non-overlapping dimensions */ 889 n_B = pcis->n_B; n_D = pcis->n - n_B; 890 n_vertices = pcbddc->n_actual_vertices; 891 /* create auxiliary bitmask */ 892 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 893 for (i=0;i<n_vertices;i++) { 894 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 895 } 896 897 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 898 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 899 for (i=0, n_R=0; i<pcis->n; i++) { 900 if (!PetscBTLookup(bitmask,i)) { 901 idx_R_local[n_R] = i; 902 n_R++; 903 } 904 } 905 906 /* Block code */ 907 vbs = 1; 908 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 909 if (bs>1 && !(n_vertices%bs)) { 910 PetscBool is_blocked = PETSC_TRUE; 911 PetscInt *vary; 912 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 913 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 914 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 915 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 916 for (i=0; i<n_vertices; i++) { 917 if (vary[i]!=0 && vary[i]!=bs) { 918 is_blocked = PETSC_FALSE; 919 break; 920 } 921 } 922 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 923 vbs = bs; 924 for (i=0;i<n_R/vbs;i++) { 925 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 926 } 927 } 928 ierr = PetscFree(vary);CHKERRQ(ierr); 929 } 930 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 931 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 932 933 /* print some info if requested */ 934 if (pcbddc->dbg_flag) { 935 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 936 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 937 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 938 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 942 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 943 } 944 945 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 946 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 947 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 948 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 949 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 950 for (i=0; i<n_D; i++) { 951 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 952 } 953 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 954 for (i=0, j=0; i<n_R; i++) { 955 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 956 aux_array1[j++] = i; 957 } 958 } 959 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 960 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 961 for (i=0, j=0; i<n_B; i++) { 962 if (!PetscBTLookup(bitmask,is_indices[i])) { 963 aux_array2[j++] = i; 964 } 965 } 966 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 968 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 969 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 970 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 971 972 if (pcbddc->switch_static || pcbddc->dbg_flag) { 973 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 974 for (i=0, j=0; i<n_R; i++) { 975 if (PetscBTLookup(bitmask,idx_R_local[i])) { 976 aux_array1[j++] = i; 977 } 978 } 979 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 980 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 981 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 982 } 983 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 984 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 991 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 992 { 993 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 994 PC_IS *pcis = (PC_IS*)pc->data; 995 PC pc_temp; 996 Mat A_RR; 997 MatStructure matstruct; 998 MatReuse reuse; 999 PetscScalar m_one = -1.0; 1000 PetscReal value; 1001 PetscInt n_D,n_R,ibs,mbs; 1002 PetscBool use_exact,use_exact_reduced,issbaij; 1003 PetscErrorCode ierr; 1004 /* prefixes stuff */ 1005 char dir_prefix[256],neu_prefix[256],str_level[3]; 1006 size_t len; 1007 1008 PetscFunctionBegin; 1009 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1010 1011 /* compute prefixes */ 1012 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1013 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1014 if (!pcbddc->current_level) { 1015 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1016 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1017 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1018 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1019 } else { 1020 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1021 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1022 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1023 len -= 15; /* remove "pc_bddc_coarse_" */ 1024 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1025 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1026 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1027 *(dir_prefix+len)='\0'; 1028 *(neu_prefix+len)='\0'; 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1032 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1033 } 1034 1035 /* DIRICHLET PROBLEM */ 1036 /* Matrix for Dirichlet problem is pcis->A_II */ 1037 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1038 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1039 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1040 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1041 /* default */ 1042 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1043 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1044 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1045 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1046 if (issbaij) { 1047 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1048 } else { 1049 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1050 } 1051 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1052 } 1053 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1054 /* Allow user's customization */ 1055 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1056 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1057 if (!n_D) { 1058 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1059 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1060 } 1061 /* Set Up KSP for Dirichlet problem of BDDC */ 1062 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1063 /* set ksp_D into pcis data */ 1064 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1065 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1066 pcis->ksp_D = pcbddc->ksp_D; 1067 1068 /* NEUMANN PROBLEM */ 1069 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1070 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1071 if (pcbddc->ksp_R) { /* already created ksp */ 1072 PetscInt nn_R; 1073 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1074 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1075 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1076 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1077 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1078 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1079 reuse = MAT_INITIAL_MATRIX; 1080 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1081 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1082 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1083 reuse = MAT_INITIAL_MATRIX; 1084 } else { /* safe to reuse the matrix */ 1085 reuse = MAT_REUSE_MATRIX; 1086 } 1087 } 1088 /* last check */ 1089 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1091 reuse = MAT_INITIAL_MATRIX; 1092 } 1093 } else { /* first time, so we need to create the matrix */ 1094 reuse = MAT_INITIAL_MATRIX; 1095 } 1096 /* extract A_RR */ 1097 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1098 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1099 if (ibs != mbs) { 1100 Mat newmat; 1101 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1102 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1103 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1104 } else { 1105 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1106 } 1107 if (!pcbddc->ksp_R) { /* create object if not present */ 1108 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1109 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1110 /* default */ 1111 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1112 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1113 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1114 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1115 if (issbaij) { 1116 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1117 } else { 1118 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1119 } 1120 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1121 } 1122 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1123 /* Allow user's customization */ 1124 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1125 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1126 if (!n_R) { 1127 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1128 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1129 } 1130 /* Set Up KSP for Neumann problem of BDDC */ 1131 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1132 1133 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1134 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1135 /* Dirichlet */ 1136 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1137 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1138 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1139 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1140 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1141 /* need to be adapted? */ 1142 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1143 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1144 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1145 /* print info */ 1146 if (pcbddc->dbg_flag) { 1147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1148 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1153 } 1154 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1155 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1156 } 1157 1158 /* Neumann */ 1159 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1160 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1161 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1162 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1164 /* need to be adapted? */ 1165 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1166 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1167 /* print info */ 1168 if (pcbddc->dbg_flag) { 1169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1171 } 1172 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1173 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1174 } 1175 } 1176 /* free Neumann problem's matrix */ 1177 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1183 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1184 { 1185 PetscErrorCode ierr; 1186 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1187 1188 PetscFunctionBegin; 1189 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1190 if (pcbddc->local_auxmat1) { 1191 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1192 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1199 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1200 { 1201 PetscErrorCode ierr; 1202 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1203 PC_IS* pcis = (PC_IS*) (pc->data); 1204 const PetscScalar zero = 0.0; 1205 1206 PetscFunctionBegin; 1207 /* Application of PHI^T (or PSI^T) */ 1208 if (pcbddc->coarse_psi_B) { 1209 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1210 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1211 } else { 1212 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1213 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1214 } 1215 /* Scatter data of coarse_rhs */ 1216 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1217 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1218 1219 /* Local solution on R nodes */ 1220 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1221 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1222 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1223 if (pcbddc->switch_static) { 1224 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 } 1227 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1228 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 if (pcbddc->switch_static) { 1232 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 } 1235 1236 /* Coarse solution */ 1237 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1239 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1240 } 1241 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1242 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1243 1244 /* Sum contributions from two levels */ 1245 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1246 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1252 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1253 { 1254 PetscErrorCode ierr; 1255 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1256 1257 PetscFunctionBegin; 1258 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1264 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1265 { 1266 PetscErrorCode ierr; 1267 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1268 1269 PetscFunctionBegin; 1270 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 /* uncomment for testing purposes */ 1275 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1278 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1279 { 1280 PetscErrorCode ierr; 1281 PC_IS* pcis = (PC_IS*)(pc->data); 1282 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1283 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1284 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1285 MatType impMatType=MATSEQAIJ; 1286 /* one and zero */ 1287 PetscScalar one=1.0,zero=0.0; 1288 /* space to store constraints and their local indices */ 1289 PetscScalar *temp_quadrature_constraint; 1290 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1291 /* iterators */ 1292 PetscInt i,j,k,total_counts,temp_start_ptr; 1293 /* stuff to store connected components stored in pcbddc->mat_graph */ 1294 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1295 PetscInt n_ISForFaces,n_ISForEdges; 1296 /* near null space stuff */ 1297 MatNullSpace nearnullsp; 1298 const Vec *nearnullvecs; 1299 Vec *localnearnullsp; 1300 PetscBool nnsp_has_cnst; 1301 PetscInt nnsp_size; 1302 PetscScalar *array; 1303 /* BLAS integers */ 1304 PetscBLASInt lwork,lierr; 1305 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1306 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1307 /* LAPACK working arrays for SVD or POD */ 1308 PetscBool skip_lapack; 1309 PetscScalar *work; 1310 PetscReal *singular_vals; 1311 #if defined(PETSC_USE_COMPLEX) 1312 PetscReal *rwork; 1313 #endif 1314 #if defined(PETSC_MISSING_LAPACK_GESVD) 1315 PetscBLASInt Blas_one_2=1; 1316 PetscScalar *temp_basis,*correlation_mat; 1317 #else 1318 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1319 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1320 #endif 1321 /* reuse */ 1322 PetscInt olocal_primal_size; 1323 PetscInt *oprimal_indices_local_idxs; 1324 /* change of basis */ 1325 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1326 PetscBool boolforchange,qr_needed; 1327 PetscBT touched,change_basis,qr_needed_idx; 1328 /* auxiliary stuff */ 1329 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1330 /* some quantities */ 1331 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1332 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1333 1334 1335 PetscFunctionBegin; 1336 /* Destroy Mat objects computed previously */ 1337 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1338 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1339 /* Get index sets for faces, edges and vertices from graph */ 1340 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1341 pcbddc->use_vertices = PETSC_TRUE; 1342 } 1343 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1344 /* HACK: provide functions to set change of basis */ 1345 if (!ISForVertices && pcbddc->NullSpace) { 1346 pcbddc->use_change_of_basis = PETSC_TRUE; 1347 pcbddc->use_change_on_faces = PETSC_FALSE; 1348 } 1349 /* print some info */ 1350 if (pcbddc->dbg_flag) { 1351 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1353 i = 0; 1354 if (ISForVertices) { 1355 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1356 } 1357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1360 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1361 } 1362 /* check if near null space is attached to global mat */ 1363 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1364 if (nearnullsp) { 1365 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1366 /* remove any stored info */ 1367 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1368 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1369 /* store information for BDDC solver reuse */ 1370 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1371 pcbddc->onearnullspace = nearnullsp; 1372 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1373 for (i=0;i<nnsp_size;i++) { 1374 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1375 } 1376 } else { /* if near null space is not provided BDDC uses constants by default */ 1377 nnsp_size = 0; 1378 nnsp_has_cnst = PETSC_TRUE; 1379 } 1380 /* get max number of constraints on a single cc */ 1381 max_constraints = nnsp_size; 1382 if (nnsp_has_cnst) max_constraints++; 1383 1384 /* 1385 Evaluate maximum storage size needed by the procedure 1386 - temp_indices will contain start index of each constraint stored as follows 1387 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1388 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1389 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1390 */ 1391 total_counts = n_ISForFaces+n_ISForEdges; 1392 total_counts *= max_constraints; 1393 n_vertices = 0; 1394 if (ISForVertices) { 1395 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1396 } 1397 total_counts += n_vertices; 1398 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1399 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1400 total_counts = 0; 1401 max_size_of_constraint = 0; 1402 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1403 if (i<n_ISForEdges) { 1404 used_IS = &ISForEdges[i]; 1405 } else { 1406 used_IS = &ISForFaces[i-n_ISForEdges]; 1407 } 1408 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1409 total_counts += j; 1410 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1411 } 1412 total_counts *= max_constraints; 1413 total_counts += n_vertices; 1414 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1415 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1416 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1417 /* get local part of global near null space vectors */ 1418 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1419 for (k=0;k<nnsp_size;k++) { 1420 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1421 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1422 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 } 1424 1425 /* whether or not to skip lapack calls */ 1426 skip_lapack = PETSC_TRUE; 1427 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1428 1429 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1430 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1431 PetscScalar temp_work; 1432 #if defined(PETSC_MISSING_LAPACK_GESVD) 1433 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1434 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1435 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1436 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1437 #if defined(PETSC_USE_COMPLEX) 1438 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1439 #endif 1440 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1441 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1442 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1443 lwork = -1; 1444 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1445 #if !defined(PETSC_USE_COMPLEX) 1446 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1447 #else 1448 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1449 #endif 1450 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1451 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1452 #else /* on missing GESVD */ 1453 /* SVD */ 1454 PetscInt max_n,min_n; 1455 max_n = max_size_of_constraint; 1456 min_n = max_constraints; 1457 if (max_size_of_constraint < max_constraints) { 1458 min_n = max_size_of_constraint; 1459 max_n = max_constraints; 1460 } 1461 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1462 #if defined(PETSC_USE_COMPLEX) 1463 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1464 #endif 1465 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1466 lwork = -1; 1467 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1468 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1469 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1470 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1471 #if !defined(PETSC_USE_COMPLEX) 1472 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1473 #else 1474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1475 #endif 1476 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1478 #endif /* on missing GESVD */ 1479 /* Allocate optimal workspace */ 1480 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1481 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1482 } 1483 /* Now we can loop on constraining sets */ 1484 total_counts = 0; 1485 temp_indices[0] = 0; 1486 /* vertices */ 1487 if (ISForVertices) { 1488 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1489 if (nnsp_has_cnst) { /* consider all vertices */ 1490 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1491 for (i=0;i<n_vertices;i++) { 1492 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1493 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1494 total_counts++; 1495 } 1496 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1497 PetscBool used_vertex; 1498 for (i=0;i<n_vertices;i++) { 1499 used_vertex = PETSC_FALSE; 1500 k = 0; 1501 while (!used_vertex && k<nnsp_size) { 1502 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1503 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1504 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1505 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1506 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1507 total_counts++; 1508 used_vertex = PETSC_TRUE; 1509 } 1510 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1511 k++; 1512 } 1513 } 1514 } 1515 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1516 n_vertices = total_counts; 1517 } 1518 1519 /* edges and faces */ 1520 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1521 if (i<n_ISForEdges) { 1522 used_IS = &ISForEdges[i]; 1523 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1524 } else { 1525 used_IS = &ISForFaces[i-n_ISForEdges]; 1526 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1527 } 1528 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1529 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1530 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1531 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1532 /* change of basis should not be performed on local periodic nodes */ 1533 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1534 if (nnsp_has_cnst) { 1535 PetscScalar quad_value; 1536 temp_constraints++; 1537 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1538 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1539 for (j=0;j<size_of_constraint;j++) { 1540 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1541 } 1542 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1543 total_counts++; 1544 } 1545 for (k=0;k<nnsp_size;k++) { 1546 PetscReal real_value; 1547 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1548 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1549 for (j=0;j<size_of_constraint;j++) { 1550 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1551 } 1552 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1553 /* check if array is null on the connected component */ 1554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1555 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1556 if (real_value > 0.0) { /* keep indices and values */ 1557 temp_constraints++; 1558 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1559 total_counts++; 1560 } 1561 } 1562 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1563 valid_constraints = temp_constraints; 1564 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1565 if (!pcbddc->use_nnsp_true && temp_constraints) { 1566 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1567 1568 #if defined(PETSC_MISSING_LAPACK_GESVD) 1569 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1570 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1571 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1572 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1573 from that computed using LAPACKgesvd 1574 -> This is due to a different computation of eigenvectors in LAPACKheev 1575 -> The quality of the POD-computed basis will be the same */ 1576 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1577 /* Store upper triangular part of correlation matrix */ 1578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1579 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1580 for (j=0;j<temp_constraints;j++) { 1581 for (k=0;k<j+1;k++) { 1582 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1583 } 1584 } 1585 /* compute eigenvalues and eigenvectors of correlation matrix */ 1586 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1587 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1588 #if !defined(PETSC_USE_COMPLEX) 1589 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1590 #else 1591 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1592 #endif 1593 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1594 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1595 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1596 j=0; 1597 while (j < temp_constraints && singular_vals[j] < tol) j++; 1598 total_counts=total_counts-j; 1599 valid_constraints = temp_constraints-j; 1600 /* scale and copy POD basis into used quadrature memory */ 1601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1602 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1603 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1606 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1607 if (j<temp_constraints) { 1608 PetscInt ii; 1609 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1610 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1611 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1612 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1613 for (k=0;k<temp_constraints-j;k++) { 1614 for (ii=0;ii<size_of_constraint;ii++) { 1615 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1616 } 1617 } 1618 } 1619 #else /* on missing GESVD */ 1620 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1621 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1623 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1624 #if !defined(PETSC_USE_COMPLEX) 1625 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1626 #else 1627 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1628 #endif 1629 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1630 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1631 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1632 k = temp_constraints; 1633 if (k > size_of_constraint) k = size_of_constraint; 1634 j = 0; 1635 while (j < k && singular_vals[k-j-1] < tol) j++; 1636 total_counts = total_counts-temp_constraints+k-j; 1637 valid_constraints = k-j; 1638 #endif /* on missing GESVD */ 1639 } 1640 /* setting change_of_basis flag is safe now */ 1641 if (boolforchange) { 1642 for (j=0;j<valid_constraints;j++) { 1643 PetscBTSet(change_basis,total_counts-j-1); 1644 } 1645 } 1646 } 1647 /* free index sets of faces, edges and vertices */ 1648 for (i=0;i<n_ISForFaces;i++) { 1649 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1650 } 1651 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1652 for (i=0;i<n_ISForEdges;i++) { 1653 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1654 } 1655 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1656 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1657 /* map temp_indices_to_constraint in boundary numbering */ 1658 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1659 if (i != temp_indices[total_counts]) { 1660 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1661 } 1662 1663 /* free workspace */ 1664 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1665 ierr = PetscFree(work);CHKERRQ(ierr); 1666 #if defined(PETSC_USE_COMPLEX) 1667 ierr = PetscFree(rwork);CHKERRQ(ierr); 1668 #endif 1669 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1670 #if defined(PETSC_MISSING_LAPACK_GESVD) 1671 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1672 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1673 #endif 1674 } 1675 for (k=0;k<nnsp_size;k++) { 1676 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1677 } 1678 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1679 1680 /* set quantities in pcbddc data structure and store previous primal size */ 1681 /* n_vertices defines the number of subdomain corners in the primal space */ 1682 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1683 olocal_primal_size = pcbddc->local_primal_size; 1684 pcbddc->local_primal_size = total_counts; 1685 pcbddc->n_vertices = n_vertices; 1686 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1687 1688 /* Create constraint matrix */ 1689 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1690 /* so we need to set it up properly either with or without change of basis */ 1691 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1692 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1693 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1694 /* array to compute a local numbering of constraints : vertices first then constraints */ 1695 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1696 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1697 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1698 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1699 /* auxiliary stuff for basis change */ 1700 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1701 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1702 1703 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1704 total_primal_vertices=0; 1705 for (i=0;i<pcbddc->local_primal_size;i++) { 1706 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1707 if (size_of_constraint == 1) { 1708 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1709 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1710 aux_primal_minloc[total_primal_vertices]=0; 1711 total_primal_vertices++; 1712 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1713 PetscInt min_loc,min_index; 1714 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1715 /* find first untouched local node */ 1716 k = 0; 1717 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1718 min_index = global_indices[k]; 1719 min_loc = k; 1720 /* search the minimum among global nodes already untouched on the cc */ 1721 for (k=1;k<size_of_constraint;k++) { 1722 /* there can be more than one constraint on a single connected component */ 1723 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1724 min_index = global_indices[k]; 1725 min_loc = k; 1726 } 1727 } 1728 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1729 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1730 aux_primal_minloc[total_primal_vertices]=min_loc; 1731 total_primal_vertices++; 1732 } 1733 } 1734 /* determine if a QR strategy is needed for change of basis */ 1735 qr_needed = PETSC_FALSE; 1736 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1737 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1738 if (PetscBTLookup(change_basis,i)) { 1739 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1740 j = 0; 1741 for (k=0;k<size_of_constraint;k++) { 1742 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1743 j++; 1744 } 1745 } 1746 /* found more than one primal dof on the cc */ 1747 if (j > 1) { 1748 PetscBTSet(qr_needed_idx,i); 1749 qr_needed = PETSC_TRUE; 1750 } 1751 } 1752 } 1753 /* free workspace */ 1754 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1755 1756 /* permute indices in order to have a sorted set of vertices */ 1757 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1758 1759 /* nonzero structure of constraint matrix */ 1760 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1761 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1762 j=total_primal_vertices; 1763 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1764 if (!PetscBTLookup(change_basis,i)) { 1765 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1766 j++; 1767 } 1768 } 1769 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1770 ierr = PetscFree(nnz);CHKERRQ(ierr); 1771 /* set values in constraint matrix */ 1772 for (i=0;i<total_primal_vertices;i++) { 1773 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1774 } 1775 total_counts = total_primal_vertices; 1776 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1777 if (!PetscBTLookup(change_basis,i)) { 1778 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1779 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1780 total_counts++; 1781 } 1782 } 1783 /* assembling */ 1784 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1785 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 /* 1787 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1788 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1789 */ 1790 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1791 if (pcbddc->use_change_of_basis) { 1792 /* dual and primal dofs on a single cc */ 1793 PetscInt dual_dofs,primal_dofs; 1794 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1795 PetscInt primal_counter; 1796 /* working stuff for GEQRF */ 1797 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1798 PetscBLASInt lqr_work; 1799 /* working stuff for UNGQR */ 1800 PetscScalar *gqr_work,lgqr_work_t; 1801 PetscBLASInt lgqr_work; 1802 /* working stuff for TRTRS */ 1803 PetscScalar *trs_rhs; 1804 PetscBLASInt Blas_NRHS; 1805 /* pointers for values insertion into change of basis matrix */ 1806 PetscInt *start_rows,*start_cols; 1807 PetscScalar *start_vals; 1808 /* working stuff for values insertion */ 1809 PetscBT is_primal; 1810 1811 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1812 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1813 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1814 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1815 /* work arrays */ 1816 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1817 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1818 /* nonzeros per row */ 1819 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1820 if (PetscBTLookup(change_basis,i)) { 1821 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1822 if (PetscBTLookup(qr_needed_idx,i)) { 1823 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1824 } else { 1825 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1826 /* get local primal index on the cc */ 1827 j = 0; 1828 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1829 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1830 } 1831 } 1832 } 1833 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1834 ierr = PetscFree(nnz);CHKERRQ(ierr); 1835 /* Set initial identity in the matrix */ 1836 for (i=0;i<pcis->n_B;i++) { 1837 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1838 } 1839 1840 if (pcbddc->dbg_flag) { 1841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1843 } 1844 1845 1846 /* Now we loop on the constraints which need a change of basis */ 1847 /* 1848 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1849 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1850 1851 Basic blocks of change of basis matrix T computed 1852 1853 - Using the following block transformation if there is only a primal dof on the cc 1854 (in the example, primal dof is the last one of the edge in LOCAL ordering 1855 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1856 | 1 0 ... 0 1 | 1857 | 0 1 ... 0 1 | 1858 | ... | 1859 | 0 ... 1 1 | 1860 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1861 1862 - via QR decomposition of constraints otherwise 1863 */ 1864 if (qr_needed) { 1865 /* space to store Q */ 1866 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1867 /* first we issue queries for optimal work */ 1868 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1871 lqr_work = -1; 1872 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1873 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1874 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1875 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1876 lgqr_work = -1; 1877 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1878 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1879 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1880 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1881 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1882 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1883 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1884 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1885 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1886 /* array to store scaling factors for reflectors */ 1887 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1888 /* array to store rhs and solution of triangular solver */ 1889 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1890 /* allocating workspace for check */ 1891 if (pcbddc->dbg_flag) { 1892 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1893 } 1894 } 1895 /* array to store whether a node is primal or not */ 1896 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1897 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1898 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1899 if (i != total_primal_vertices) { 1900 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1901 } 1902 for (i=0;i<total_primal_vertices;i++) { 1903 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1904 } 1905 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1906 1907 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1908 /* -> using implicit ordering contained in temp_indices data */ 1909 total_counts = pcbddc->n_vertices; 1910 primal_counter = total_counts; 1911 while (total_counts<pcbddc->local_primal_size) { 1912 primal_dofs = 1; 1913 if (PetscBTLookup(change_basis,total_counts)) { 1914 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1915 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1916 primal_dofs++; 1917 } 1918 /* get constraint info */ 1919 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1920 dual_dofs = size_of_constraint-primal_dofs; 1921 1922 if (pcbddc->dbg_flag) { 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1924 } 1925 1926 if (primal_dofs > 1) { /* QR */ 1927 1928 /* copy quadrature constraints for change of basis check */ 1929 if (pcbddc->dbg_flag) { 1930 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1931 } 1932 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1933 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1934 1935 /* compute QR decomposition of constraints */ 1936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1937 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1939 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1940 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1941 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1942 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1943 1944 /* explictly compute R^-T */ 1945 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1946 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1947 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1948 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1949 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1951 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1952 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1953 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1954 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1955 1956 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1961 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1962 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1963 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1964 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1965 1966 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1967 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1968 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1970 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1973 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1975 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1976 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1977 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1978 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1979 1980 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1981 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1982 /* insert cols for primal dofs */ 1983 for (j=0;j<primal_dofs;j++) { 1984 start_vals = &qr_basis[j*size_of_constraint]; 1985 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1986 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1987 } 1988 /* insert cols for dual dofs */ 1989 for (j=0,k=0;j<dual_dofs;k++) { 1990 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1991 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1992 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1993 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1994 j++; 1995 } 1996 } 1997 1998 /* check change of basis */ 1999 if (pcbddc->dbg_flag) { 2000 PetscInt ii,jj; 2001 PetscBool valid_qr=PETSC_TRUE; 2002 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2008 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2009 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2010 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2011 for (jj=0;jj<size_of_constraint;jj++) { 2012 for (ii=0;ii<primal_dofs;ii++) { 2013 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2014 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2015 } 2016 } 2017 if (!valid_qr) { 2018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2019 for (jj=0;jj<size_of_constraint;jj++) { 2020 for (ii=0;ii<primal_dofs;ii++) { 2021 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2022 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2023 } 2024 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2025 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2026 } 2027 } 2028 } 2029 } else { 2030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2031 } 2032 } 2033 } else { /* simple transformation block */ 2034 PetscInt row,col; 2035 PetscScalar val; 2036 for (j=0;j<size_of_constraint;j++) { 2037 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2038 if (!PetscBTLookup(is_primal,row)) { 2039 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2040 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2041 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2042 } else { 2043 for (k=0;k<size_of_constraint;k++) { 2044 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2045 if (row != col) { 2046 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2047 } else { 2048 val = 1.0; 2049 } 2050 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } 2053 } 2054 } 2055 /* increment primal counter */ 2056 primal_counter += primal_dofs; 2057 } else { 2058 if (pcbddc->dbg_flag) { 2059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2060 } 2061 } 2062 /* increment constraint counter total_counts */ 2063 total_counts += primal_dofs; 2064 } 2065 2066 /* free workspace */ 2067 if (qr_needed) { 2068 if (pcbddc->dbg_flag) { 2069 ierr = PetscFree(work);CHKERRQ(ierr); 2070 } 2071 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2072 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2073 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2074 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2075 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2076 } 2077 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2078 /* assembling */ 2079 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 /* 2082 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2083 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2084 */ 2085 } 2086 2087 /* get indices in local ordering for vertices and constraints */ 2088 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2089 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2090 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2093 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2094 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2095 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2096 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2097 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2098 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2099 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2100 /* set quantities in PCBDDC data struct */ 2101 pcbddc->n_actual_vertices = i; 2102 /* check if a new primal space has been introduced */ 2103 pcbddc->new_primal_space_local = PETSC_TRUE; 2104 if (olocal_primal_size == pcbddc->local_primal_size) { 2105 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2106 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2107 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2108 } 2109 /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */ 2110 if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE; 2111 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2112 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2113 2114 /* flush dbg viewer */ 2115 if (pcbddc->dbg_flag) { 2116 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2117 } 2118 2119 /* free workspace */ 2120 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2121 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2122 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2123 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2124 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2125 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2126 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2127 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2133 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2134 { 2135 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2136 PC_IS *pcis = (PC_IS*)pc->data; 2137 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2138 PetscInt ierr,i,vertex_size; 2139 PetscViewer viewer=pcbddc->dbg_viewer; 2140 2141 PetscFunctionBegin; 2142 /* Reset previously computed graph */ 2143 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2144 /* Init local Graph struct */ 2145 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2146 2147 /* Check validity of the csr graph passed in by the user */ 2148 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2149 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2150 } 2151 2152 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2153 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2154 Mat mat_adj; 2155 const PetscInt *xadj,*adjncy; 2156 PetscBool flg_row=PETSC_TRUE; 2157 2158 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2159 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2160 if (!flg_row) { 2161 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2162 } 2163 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2164 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2165 if (!flg_row) { 2166 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2167 } 2168 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2169 } 2170 2171 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2172 vertex_size = 1; 2173 if (pcbddc->user_provided_isfordofs) { 2174 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2175 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2176 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2177 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2178 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2179 } 2180 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2181 pcbddc->n_ISForDofs = 0; 2182 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2183 } 2184 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2185 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2186 } else { 2187 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2188 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2189 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2190 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2191 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2192 } 2193 } 2194 } 2195 2196 /* Setup of Graph */ 2197 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2198 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2199 } 2200 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2201 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2202 } 2203 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2204 2205 /* Graph's connected components analysis */ 2206 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2207 2208 /* print some info to stdout */ 2209 if (pcbddc->dbg_flag) { 2210 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2211 } 2212 2213 /* mark topography has done */ 2214 pcbddc->recompute_topography = PETSC_FALSE; 2215 PetscFunctionReturn(0); 2216 } 2217 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2220 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2221 { 2222 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2223 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2224 PetscErrorCode ierr; 2225 2226 PetscFunctionBegin; 2227 n = 0; 2228 vertices = 0; 2229 if (pcbddc->ConstraintMatrix) { 2230 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2231 for (i=0;i<local_primal_size;i++) { 2232 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2233 if (size_of_constraint == 1) n++; 2234 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2235 } 2236 if (vertices_idx) { 2237 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2238 n = 0; 2239 for (i=0;i<local_primal_size;i++) { 2240 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2241 if (size_of_constraint == 1) { 2242 vertices[n++]=row_cmat_indices[0]; 2243 } 2244 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2245 } 2246 } 2247 } 2248 *n_vertices = n; 2249 if (vertices_idx) *vertices_idx = vertices; 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2255 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2256 { 2257 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2258 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2259 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2260 PetscBT touched; 2261 PetscErrorCode ierr; 2262 2263 /* This function assumes that the number of local constraints per connected component 2264 is not greater than the number of nodes defined for the connected component 2265 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2266 PetscFunctionBegin; 2267 n = 0; 2268 constraints_index = 0; 2269 if (pcbddc->ConstraintMatrix) { 2270 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2271 max_size_of_constraint = 0; 2272 for (i=0;i<local_primal_size;i++) { 2273 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2274 if (size_of_constraint > 1) { 2275 n++; 2276 } 2277 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2278 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2279 } 2280 if (constraints_idx) { 2281 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2282 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2283 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2284 n = 0; 2285 for (i=0;i<local_primal_size;i++) { 2286 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2287 if (size_of_constraint > 1) { 2288 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2289 /* find first untouched local node */ 2290 j = 0; 2291 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2292 min_index = row_cmat_global_indices[j]; 2293 min_loc = j; 2294 /* search the minimum among nodes not yet touched on the connected component 2295 since there can be more than one constraint on a single cc */ 2296 for (j=1;j<size_of_constraint;j++) { 2297 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2298 min_index = row_cmat_global_indices[j]; 2299 min_loc = j; 2300 } 2301 } 2302 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2303 constraints_index[n++] = row_cmat_indices[min_loc]; 2304 } 2305 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2306 } 2307 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2308 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2309 } 2310 } 2311 *n_constraints = n; 2312 if (constraints_idx) *constraints_idx = constraints_index; 2313 PetscFunctionReturn(0); 2314 } 2315 2316 /* the next two functions has been adapted from pcis.c */ 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "PCBDDCApplySchur" 2319 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2320 { 2321 PetscErrorCode ierr; 2322 PC_IS *pcis = (PC_IS*)(pc->data); 2323 2324 PetscFunctionBegin; 2325 if (!vec2_B) { vec2_B = v; } 2326 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2327 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2328 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2329 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2330 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2336 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2337 { 2338 PetscErrorCode ierr; 2339 PC_IS *pcis = (PC_IS*)(pc->data); 2340 2341 PetscFunctionBegin; 2342 if (!vec2_B) { vec2_B = v; } 2343 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2344 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2345 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2346 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2347 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "PCBDDCSubsetNumbering" 2353 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2354 { 2355 Vec local_vec,global_vec; 2356 IS seqis,paris; 2357 VecScatter scatter_ctx; 2358 PetscScalar *array; 2359 PetscInt *temp_global_dofs; 2360 PetscScalar globalsum; 2361 PetscInt i,j,s; 2362 PetscInt nlocals,first_index,old_index,max_local; 2363 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2364 PetscMPIInt *dof_sizes,*dof_displs; 2365 PetscBool first_found; 2366 PetscErrorCode ierr; 2367 2368 PetscFunctionBegin; 2369 /* mpi buffers */ 2370 MPI_Comm_size(comm,&size_prec_comm); 2371 MPI_Comm_rank(comm,&rank_prec_comm); 2372 j = ( !rank_prec_comm ? size_prec_comm : 0); 2373 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2374 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2375 /* get maximum size of subset */ 2376 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2377 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2378 max_local = 0; 2379 if (n_local_dofs) { 2380 max_local = temp_global_dofs[0]; 2381 for (i=1;i<n_local_dofs;i++) { 2382 if (max_local < temp_global_dofs[i] ) { 2383 max_local = temp_global_dofs[i]; 2384 } 2385 } 2386 } 2387 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2388 max_global++; 2389 max_local = 0; 2390 if (n_local_dofs) { 2391 max_local = local_dofs[0]; 2392 for (i=1;i<n_local_dofs;i++) { 2393 if (max_local < local_dofs[i] ) { 2394 max_local = local_dofs[i]; 2395 } 2396 } 2397 } 2398 max_local++; 2399 /* allocate workspace */ 2400 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2401 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2402 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2403 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2404 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2405 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2406 /* create scatter */ 2407 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2408 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2409 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2410 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2411 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2412 /* init array */ 2413 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2414 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2415 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2416 if (local_dofs_mult) { 2417 for (i=0;i<n_local_dofs;i++) { 2418 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2419 } 2420 } else { 2421 for (i=0;i<n_local_dofs;i++) { 2422 array[local_dofs[i]]=1.0; 2423 } 2424 } 2425 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2426 /* scatter into global vec and get total number of global dofs */ 2427 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2428 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2429 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2430 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2431 /* Fill global_vec with cumulative function for global numbering */ 2432 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2433 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2434 nlocals = 0; 2435 first_index = -1; 2436 first_found = PETSC_FALSE; 2437 for (i=0;i<s;i++) { 2438 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2439 first_found = PETSC_TRUE; 2440 first_index = i; 2441 } 2442 nlocals += (PetscInt)PetscRealPart(array[i]); 2443 } 2444 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2445 if (!rank_prec_comm) { 2446 dof_displs[0]=0; 2447 for (i=1;i<size_prec_comm;i++) { 2448 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2449 } 2450 } 2451 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2452 if (first_found) { 2453 array[first_index] += (PetscScalar)nlocals; 2454 old_index = first_index; 2455 for (i=first_index+1;i<s;i++) { 2456 if (PetscRealPart(array[i]) > 0.0) { 2457 array[i] += array[old_index]; 2458 old_index = i; 2459 } 2460 } 2461 } 2462 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2463 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2464 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2465 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2466 /* get global ordering of local dofs */ 2467 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2468 if (local_dofs_mult) { 2469 for (i=0;i<n_local_dofs;i++) { 2470 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2471 } 2472 } else { 2473 for (i=0;i<n_local_dofs;i++) { 2474 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2475 } 2476 } 2477 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2478 /* free workspace */ 2479 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2480 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2481 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2482 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2483 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2484 /* return pointer to global ordering of local dofs */ 2485 *global_numbering_subset = temp_global_dofs; 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2491 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2492 { 2493 PetscInt i,j; 2494 PetscScalar *alphas; 2495 PetscErrorCode ierr; 2496 2497 PetscFunctionBegin; 2498 /* this implements stabilized Gram-Schmidt */ 2499 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2500 for (i=0;i<n;i++) { 2501 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2502 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2503 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2504 } 2505 ierr = PetscFree(alphas);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /* TODO 2510 - now preallocation is done assuming SEQDENSE local matrices 2511 */ 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatISGetMPIXAIJ" 2514 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2515 { 2516 Mat new_mat; 2517 Mat_IS *matis = (Mat_IS*)(mat->data); 2518 /* info on mat */ 2519 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2520 PetscInt bs,rows,cols; 2521 PetscInt lrows,lcols; 2522 PetscInt local_rows,local_cols; 2523 PetscBool isdense; 2524 /* values insertion */ 2525 PetscScalar *array; 2526 PetscInt *local_indices,*global_indices; 2527 /* work */ 2528 PetscInt i,j,index_row; 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 /* MISSING CHECKS 2533 - rectangular case not covered (it is not allowed by MATIS) 2534 */ 2535 /* get info from mat */ 2536 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2537 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2538 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2539 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2540 2541 /* work */ 2542 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2543 for (i=0;i<local_rows;i++) local_indices[i]=i; 2544 /* map indices of local mat to global values */ 2545 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2546 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2547 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2548 2549 if (reuse==MAT_INITIAL_MATRIX) { 2550 Vec vec_dnz,vec_onz; 2551 PetscScalar *my_dnz,*my_onz; 2552 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2553 PetscInt index_col,owner; 2554 PetscMPIInt nsubdomains; 2555 2556 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2557 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2558 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2559 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2560 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2561 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2562 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2563 2564 /* 2565 preallocation 2566 */ 2567 2568 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2569 /* 2570 Some vectors are needed to sum up properly on shared interface dofs. 2571 Preallocation macros cannot do the job. 2572 Note that preallocation is not exact, since it overestimates nonzeros 2573 */ 2574 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2575 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2576 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2577 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2578 /* All processes need to compute entire row ownership */ 2579 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2580 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2581 for (i=0;i<nsubdomains;i++) { 2582 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2583 row_ownership[j]=i; 2584 } 2585 } 2586 2587 /* 2588 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2589 then, they will be summed up properly. This way, preallocation is always sufficient 2590 */ 2591 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2592 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2593 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2594 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2595 for (i=0;i<local_rows;i++) { 2596 index_row = global_indices[i]; 2597 for (j=i;j<local_rows;j++) { 2598 owner = row_ownership[index_row]; 2599 index_col = global_indices[j]; 2600 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2601 my_dnz[i] += 1.0; 2602 } else { /* offdiag block */ 2603 my_onz[i] += 1.0; 2604 } 2605 /* same as before, interchanging rows and cols */ 2606 if (i != j) { 2607 owner = row_ownership[index_col]; 2608 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2609 my_dnz[j] += 1.0; 2610 } else { 2611 my_onz[j] += 1.0; 2612 } 2613 } 2614 } 2615 } 2616 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2617 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2618 if (local_rows) { /* multilevel guard */ 2619 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2620 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2621 } 2622 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2623 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2624 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2625 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2626 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2627 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2628 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2629 2630 /* set computed preallocation in dnz and onz */ 2631 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2632 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2633 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2634 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2635 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2636 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2637 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2638 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2639 2640 /* Resize preallocation if overestimated */ 2641 for (i=0;i<lrows;i++) { 2642 dnz[i] = PetscMin(dnz[i],lcols); 2643 onz[i] = PetscMin(onz[i],cols-lcols); 2644 } 2645 /* set preallocation */ 2646 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2647 for (i=0;i<lrows/bs;i++) { 2648 dnz[i] = dnz[i*bs]/bs; 2649 onz[i] = onz[i*bs]/bs; 2650 } 2651 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2652 for (i=0;i<lrows/bs;i++) { 2653 dnz[i] = dnz[i]-i; 2654 } 2655 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2656 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2657 *M = new_mat; 2658 } else { 2659 PetscInt mbs,mrows,mcols; 2660 /* some checks */ 2661 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2662 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2663 if (mrows != rows) { 2664 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2665 } 2666 if (mrows != rows) { 2667 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2668 } 2669 if (mbs != bs) { 2670 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2671 } 2672 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2673 } 2674 /* set local to global mappings */ 2675 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2676 /* Set values */ 2677 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2678 if (isdense) { /* special case for dense local matrices */ 2679 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2680 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2681 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2682 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2683 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2684 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2685 } else { /* very basic values insertion for all other matrix types */ 2686 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2687 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2688 for (i=0;i<local_rows;i++) { 2689 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2690 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2691 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2692 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2693 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2694 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2695 } 2696 } 2697 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2698 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2699 if (isdense) { 2700 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2701 } 2702 PetscFunctionReturn(0); 2703 } 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatISSubassemble_Private" 2707 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2708 { 2709 Mat subdomain_adj; 2710 IS new_ranks,ranks_send_to; 2711 MatPartitioning partitioner; 2712 Mat_IS *matis; 2713 PetscInt n_neighs,*neighs,*n_shared,**shared; 2714 PetscInt prank; 2715 PetscMPIInt size,rank,color; 2716 PetscInt *xadj,*adjncy,*oldranks; 2717 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2718 PetscInt i,j,n_subdomains,local_size,threshold=0; 2719 PetscErrorCode ierr; 2720 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2721 PetscSubcomm subcomm; 2722 2723 PetscFunctionBegin; 2724 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2725 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2726 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2727 2728 /* Get info on mapping */ 2729 matis = (Mat_IS*)(mat->data); 2730 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2731 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2732 2733 /* build local CSR graph of subdomains' connectivity */ 2734 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2735 xadj[0] = 0; 2736 xadj[1] = PetscMax(n_neighs-1,0); 2737 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2738 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2739 2740 if (threshold) { 2741 PetscInt* count,min_threshold; 2742 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2743 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2744 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2745 for (j=0;j<n_shared[i];j++) { 2746 count[shared[i][j]] += 1; 2747 } 2748 } 2749 /* adapt threshold since we dont want to lose significant connections */ 2750 min_threshold = n_neighs; 2751 for (i=1;i<n_neighs;i++) { 2752 for (j=0;j<n_shared[i];j++) { 2753 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2754 } 2755 } 2756 threshold = PetscMax(min_threshold+1,threshold); 2757 xadj[1] = 0; 2758 for (i=1;i<n_neighs;i++) { 2759 for (j=0;j<n_shared[i];j++) { 2760 if (count[shared[i][j]] < threshold) { 2761 adjncy[xadj[1]] = neighs[i]; 2762 adjncy_wgt[xadj[1]] = n_shared[i]; 2763 xadj[1]++; 2764 break; 2765 } 2766 } 2767 } 2768 ierr = PetscFree(count);CHKERRQ(ierr); 2769 } else { 2770 if (xadj[1]) { 2771 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2772 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2773 } 2774 } 2775 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2776 if (use_square) { 2777 for (i=0;i<xadj[1];i++) { 2778 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2779 } 2780 } 2781 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2782 2783 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2784 2785 /* 2786 Restrict work on active processes only. 2787 */ 2788 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2789 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2790 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2791 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2792 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2793 if (color) { 2794 ierr = PetscFree(xadj);CHKERRQ(ierr); 2795 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2796 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2797 } else { 2798 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2799 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2800 prank = rank; 2801 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2802 for (i=0;i<size;i++) { 2803 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2804 } 2805 for (i=0;i<xadj[1];i++) { 2806 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2807 } 2808 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2809 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2810 n_subdomains = (PetscInt)size; 2811 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2812 2813 /* Partition */ 2814 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2815 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2816 if (use_vwgt) { 2817 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2818 v_wgt[0] = local_size; 2819 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2820 } 2821 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2822 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2823 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2824 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2825 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2826 2827 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2828 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2829 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2830 /* clean up */ 2831 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2832 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2833 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2834 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2835 } 2836 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2837 2838 /* assemble parallel IS for sends */ 2839 i = 1; 2840 if (color) i=0; 2841 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2842 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2843 2844 /* get back IS */ 2845 *is_sends = ranks_send_to; 2846 PetscFunctionReturn(0); 2847 } 2848 2849 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatISSubassemble" 2853 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2854 { 2855 Mat local_mat,new_mat; 2856 Mat_IS *matis; 2857 IS is_sends_internal; 2858 PetscInt rows,cols; 2859 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2860 PetscBool ismatis,isdense; 2861 ISLocalToGlobalMapping l2gmap; 2862 PetscInt* l2gmap_indices; 2863 const PetscInt* is_indices; 2864 MatType new_local_type; 2865 MatTypePrivate new_local_type_private; 2866 /* buffers */ 2867 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2868 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2869 /* MPI */ 2870 MPI_Comm comm; 2871 PetscMPIInt n_sends,n_recvs,commsize; 2872 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2873 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2874 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2875 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2876 PetscErrorCode ierr; 2877 2878 PetscFunctionBegin; 2879 /* checks */ 2880 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2881 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2882 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2883 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2884 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2885 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2886 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2887 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2888 PetscValidLogicalCollectiveInt(mat,bs,0); 2889 /* prepare IS for sending if not provided */ 2890 if (!is_sends) { 2891 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2892 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2893 } else { 2894 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2895 is_sends_internal = is_sends; 2896 } 2897 2898 /* get pointer of MATIS data */ 2899 matis = (Mat_IS*)mat->data; 2900 2901 /* get comm */ 2902 comm = PetscObjectComm((PetscObject)mat); 2903 2904 /* compute number of sends */ 2905 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2906 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2907 2908 /* compute number of receives */ 2909 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2910 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2911 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2912 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2913 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2914 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2915 ierr = PetscFree(iflags);CHKERRQ(ierr); 2916 2917 /* prepare send/receive buffers */ 2918 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2919 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2920 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2921 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2922 2923 /* Get data from local mat */ 2924 if (!isdense) { 2925 /* TODO: See below some guidelines on how to prepare the local buffers */ 2926 /* 2927 send_buffer_vals should contain the raw values of the local matrix 2928 send_buffer_idxs should contain: 2929 - MatType_PRIVATE type 2930 - PetscInt size_of_l2gmap 2931 - PetscInt global_row_indices[size_of_l2gmap] 2932 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2933 */ 2934 } else { 2935 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2936 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2937 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2938 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2939 send_buffer_idxs[1] = i; 2940 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2941 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2942 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2943 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2944 for (i=0;i<n_sends;i++) { 2945 ilengths_vals[is_indices[i]] = len*len; 2946 ilengths_idxs[is_indices[i]] = len+2; 2947 } 2948 } 2949 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2950 buf_size_idxs = 0; 2951 buf_size_vals = 0; 2952 for (i=0;i<n_recvs;i++) { 2953 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2954 buf_size_vals += (PetscInt)olengths_vals[i]; 2955 } 2956 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2957 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2958 2959 /* get new tags for clean communications */ 2960 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2961 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2962 2963 /* allocate for requests */ 2964 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2965 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2966 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2967 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2968 2969 /* communications */ 2970 ptr_idxs = recv_buffer_idxs; 2971 ptr_vals = recv_buffer_vals; 2972 for (i=0;i<n_recvs;i++) { 2973 source_dest = onodes[i]; 2974 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2975 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2976 ptr_idxs += olengths_idxs[i]; 2977 ptr_vals += olengths_vals[i]; 2978 } 2979 for (i=0;i<n_sends;i++) { 2980 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2981 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2982 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2983 } 2984 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2985 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2986 2987 /* assemble new l2g map */ 2988 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2989 ptr_idxs = recv_buffer_idxs; 2990 buf_size_idxs = 0; 2991 for (i=0;i<n_recvs;i++) { 2992 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2993 ptr_idxs += olengths_idxs[i]; 2994 } 2995 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2996 ptr_idxs = recv_buffer_idxs; 2997 buf_size_idxs = 0; 2998 for (i=0;i<n_recvs;i++) { 2999 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3000 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3001 ptr_idxs += olengths_idxs[i]; 3002 } 3003 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3004 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3005 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3006 3007 /* infer new local matrix type from received local matrices type */ 3008 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3009 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3010 new_local_type_private = MATAIJ_PRIVATE; 3011 new_local_type = MATSEQAIJ; 3012 if (n_recvs) { 3013 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3014 ptr_idxs = recv_buffer_idxs; 3015 for (i=0;i<n_recvs;i++) { 3016 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3017 new_local_type_private = MATAIJ_PRIVATE; 3018 break; 3019 } 3020 ptr_idxs += olengths_idxs[i]; 3021 } 3022 switch (new_local_type_private) { 3023 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3024 new_local_type = MATSEQAIJ; 3025 bs = 1; 3026 break; 3027 case MATAIJ_PRIVATE: 3028 new_local_type = MATSEQAIJ; 3029 bs = 1; 3030 break; 3031 case MATBAIJ_PRIVATE: 3032 new_local_type = MATSEQBAIJ; 3033 break; 3034 case MATSBAIJ_PRIVATE: 3035 new_local_type = MATSEQSBAIJ; 3036 break; 3037 default: 3038 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3039 break; 3040 } 3041 } 3042 3043 /* create MATIS object */ 3044 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3045 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3046 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3047 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3048 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3049 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3050 3051 /* set values */ 3052 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3053 ptr_vals = recv_buffer_vals; 3054 ptr_idxs = recv_buffer_idxs; 3055 for (i=0;i<n_recvs;i++) { 3056 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3057 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3058 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3059 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3062 } 3063 ptr_idxs += olengths_idxs[i]; 3064 ptr_vals += olengths_vals[i]; 3065 } 3066 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3070 3071 { /* check */ 3072 Vec lvec,rvec; 3073 PetscReal infty_error; 3074 3075 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3076 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3077 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3078 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3079 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3080 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3081 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3082 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3083 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3084 } 3085 3086 /* free workspace */ 3087 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3088 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3089 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3090 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3091 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3092 if (isdense) { 3093 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3094 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3095 } else { 3096 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3097 } 3098 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3099 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3100 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3101 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3102 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3103 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3104 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3105 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3106 ierr = PetscFree(onodes);CHKERRQ(ierr); 3107 /* get back new mat */ 3108 *mat_n = new_mat; 3109 PetscFunctionReturn(0); 3110 } 3111 3112 #undef __FUNCT__ 3113 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3114 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3115 { 3116 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3117 PC_IS *pcis = (PC_IS*)pc->data; 3118 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3119 MatNullSpace CoarseNullSpace=NULL; 3120 ISLocalToGlobalMapping coarse_islg; 3121 IS coarse_is; 3122 PetscInt max_it; 3123 PetscInt im_active=-1,active_procs=-1; 3124 PC pc_temp; 3125 PCType coarse_pc_type; 3126 KSPType coarse_ksp_type; 3127 PetscBool multilevel_requested,multilevel_allowed; 3128 PetscBool setsym,issym,isbddc,isnn,coarse_reuse; 3129 MatStructure matstruct; 3130 PetscErrorCode ierr; 3131 3132 PetscFunctionBegin; 3133 /* Assign global numbering to coarse dofs */ 3134 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3135 PetscInt ocoarse_size; 3136 ocoarse_size = pcbddc->coarse_size; 3137 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3138 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3139 /* see if we can avoid some work */ 3140 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3141 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3142 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3143 coarse_reuse = PETSC_FALSE; 3144 } else { /* we can safely reuse already computed coarse matrix */ 3145 coarse_reuse = PETSC_TRUE; 3146 } 3147 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3148 coarse_reuse = PETSC_FALSE; 3149 } 3150 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3151 coarse_reuse = PETSC_TRUE; 3152 } 3153 3154 /* infer some info from user */ 3155 issym = PETSC_FALSE; 3156 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3157 multilevel_allowed = PETSC_FALSE; 3158 multilevel_requested = PETSC_FALSE; 3159 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3160 if (multilevel_requested) { 3161 /* count "active processes" */ 3162 im_active = !!(pcis->n); 3163 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3164 if (active_procs/pcbddc->coarsening_ratio < 2) { 3165 multilevel_allowed = PETSC_FALSE; 3166 } else { 3167 multilevel_allowed = PETSC_TRUE; 3168 } 3169 } 3170 3171 /* set defaults for coarse KSP and PC */ 3172 if (multilevel_allowed) { 3173 if (issym) { 3174 coarse_ksp_type = KSPRICHARDSON; 3175 } else { 3176 coarse_ksp_type = KSPCHEBYSHEV; 3177 } 3178 coarse_pc_type = PCBDDC; 3179 } else { 3180 coarse_ksp_type = KSPPREONLY; 3181 coarse_pc_type = PCREDUNDANT; 3182 } 3183 3184 /* create the coarse KSP object only once with defaults */ 3185 if (!pcbddc->coarse_ksp) { 3186 char prefix[256],str_level[3]; 3187 size_t len; 3188 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3189 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3190 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3191 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3192 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3193 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3194 /* prefix */ 3195 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3196 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3197 if (!pcbddc->current_level) { 3198 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3199 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3200 } else { 3201 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3202 if (pcbddc->current_level>1) len -= 2; 3203 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3204 *(prefix+len)='\0'; 3205 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3206 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3207 } 3208 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3209 } 3210 /* allow user customization */ 3211 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3212 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3213 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3214 3215 /* get some info after set from options */ 3216 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3217 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3218 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3219 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3220 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3221 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3222 isbddc = PETSC_FALSE; 3223 } 3224 3225 /* propagate BDDC info to the next level */ 3226 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3227 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3228 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3229 3230 /* creates temporary MATIS object for coarse matrix */ 3231 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3232 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3234 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3235 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3236 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3237 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3239 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3240 3241 /* assemble coarse matrix */ 3242 if (isbddc || isnn) { 3243 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3244 } else { 3245 if (coarse_reuse) { 3246 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3247 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3248 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3249 } else { 3250 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3251 } 3252 } 3253 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3254 3255 /* create local to global scatters for coarse problem */ 3256 if (pcbddc->new_primal_space) { 3257 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3258 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3259 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3260 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3261 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3262 } 3263 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3264 3265 /* propagate symmetry info to coarse matrix */ 3266 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3267 3268 /* Compute coarse null space (special handling by BDDC only) */ 3269 if (pcbddc->NullSpace) { 3270 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3271 if (isbddc) { 3272 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3273 } else { 3274 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3275 } 3276 } 3277 3278 /* set operators */ 3279 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3280 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3281 3282 /* additional KSP customization */ 3283 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3284 if (max_it < 5) { 3285 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3286 } 3287 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3288 3289 3290 /* print some info if requested */ 3291 if (pcbddc->dbg_flag) { 3292 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3293 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3294 if (!multilevel_allowed) { 3295 if (multilevel_requested) { 3296 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3297 } else if (pcbddc->max_levels) { 3298 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3299 } 3300 } 3301 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3302 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3303 } 3304 3305 /* setup coarse ksp */ 3306 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3307 if (pcbddc->dbg_flag) { 3308 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3310 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3311 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3312 } 3313 3314 /* Check coarse problem if requested */ 3315 if (pcbddc->dbg_flag) { 3316 KSP check_ksp; 3317 KSPType check_ksp_type; 3318 PC check_pc; 3319 Vec check_vec; 3320 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3321 PetscInt its; 3322 PetscBool ispreonly,compute; 3323 3324 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3325 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3326 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3327 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3328 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3329 if (ispreonly) { 3330 check_ksp_type = KSPPREONLY; 3331 compute = PETSC_FALSE; 3332 } else { 3333 if (issym) check_ksp_type = KSPCG; 3334 else check_ksp_type = KSPGMRES; 3335 compute = PETSC_TRUE; 3336 } 3337 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3338 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3339 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3340 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3341 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3342 /* create random vec */ 3343 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3344 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3345 if (CoarseNullSpace) { 3346 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3347 } 3348 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3349 /* solve coarse problem */ 3350 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3351 if (CoarseNullSpace) { 3352 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3353 } 3354 /* check coarse problem residual error */ 3355 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3356 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3357 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3358 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3359 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3360 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3361 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3362 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3363 /* get eigenvalue estimation if preonly has not been requested */ 3364 if (!ispreonly) { 3365 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3366 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3367 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3368 } 3369 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3370 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3371 } 3372 /* free memory */ 3373 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3374 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3375 PetscFunctionReturn(0); 3376 } 3377 3378 #undef __FUNCT__ 3379 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3380 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3381 { 3382 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3383 PC_IS* pcis = (PC_IS*)pc->data; 3384 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3385 PetscInt i,coarse_size; 3386 PetscInt *local_primal_indices; 3387 PetscErrorCode ierr; 3388 3389 PetscFunctionBegin; 3390 /* Compute global number of coarse dofs */ 3391 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3392 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3393 } 3394 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3395 3396 /* check numbering */ 3397 if (pcbddc->dbg_flag) { 3398 PetscScalar coarsesum,*array; 3399 3400 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3401 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3402 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3403 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3404 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3405 for (i=0;i<pcbddc->local_primal_size;i++) { 3406 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3407 } 3408 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3409 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3410 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3411 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3412 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3413 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3416 for (i=0;i<pcis->n;i++) { 3417 if (array[i] == 1.0) { 3418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3419 } 3420 } 3421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3422 for (i=0;i<pcis->n;i++) { 3423 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3424 } 3425 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3426 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3427 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3428 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3429 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3431 if (pcbddc->dbg_flag > 1) { 3432 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3433 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3435 for (i=0;i<pcbddc->local_primal_size;i++) { 3436 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3437 } 3438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3439 } 3440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3441 } 3442 /* get back data */ 3443 *coarse_size_n = coarse_size; 3444 *local_primal_indices_n = local_primal_indices; 3445 PetscFunctionReturn(0); 3446 } 3447 3448 #undef __FUNCT__ 3449 #define __FUNCT__ "PCBDDCGlobalToLocal" 3450 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3451 { 3452 PC_IS* pcis = (PC_IS*)pc->data; 3453 IS localis_t; 3454 PetscInt i,lsize,*idxs; 3455 PetscScalar *vals; 3456 PetscErrorCode ierr; 3457 3458 PetscFunctionBegin; 3459 /* get dirichlet indices in local ordering exploiting local to global map */ 3460 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3461 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3462 for (i=0;i<lsize;i++) vals[i] = 1.0; 3463 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3464 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3465 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3466 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3467 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3468 ierr = PetscFree(vals);CHKERRQ(ierr); 3469 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3470 /* now compute dirichlet set in local ordering */ 3471 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3472 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3473 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3474 for (i=0,lsize=0;i<pcis->n;i++) { 3475 if (vals[i] == 1.0) { 3476 lsize++; 3477 } 3478 } 3479 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3480 for (i=0,lsize=0;i<pcis->n;i++) { 3481 if (vals[i] == 1.0) { 3482 idxs[lsize++] = i; 3483 } 3484 } 3485 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3486 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3487 *localis = localis_t; 3488 PetscFunctionReturn(0); 3489 } 3490